From 78f54de4939fa12e24ecc6681809b8f724188759 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 16 Jul 2019 11:22:40 -0400 Subject: [PATCH 01/10] video_core/shader/decode: Prevent sign-conversion warnings Makes it explicit that the conversions here are intentional. --- src/video_core/shader/decode.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index afffd157fa..b547d83230 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -47,14 +47,14 @@ void ShaderIR::Decode() { if (shader_info.decompilable) { disable_flow_stack = true; const auto insert_block = [this](NodeBlock& nodes, u32 label) { - if (label == exit_branch) { + if (label == static_cast(exit_branch)) { return; } basic_blocks.insert({label, nodes}); }; const auto& blocks = shader_info.blocks; NodeBlock current_block; - u32 current_label = exit_branch; + u32 current_label = static_cast(exit_branch); for (auto& block : blocks) { if (shader_info.labels.count(block.start) != 0) { insert_block(current_block, current_label); From 1bad7650ec1d288a14be6d7b6350441df7dace99 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 16 Jul 2019 11:32:35 -0400 Subject: [PATCH 02/10] video_core/control_flow: Place all internally linked types/functions within an anonymous namespace Previously, quite a few functions were being linked with external linkage. --- src/video_core/shader/control_flow.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index fdcc970ff8..440729258c 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -15,7 +15,7 @@ #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { - +namespace { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; @@ -411,6 +411,7 @@ bool TryQuery(CFGRebuildState& state) { state.queries.push_back(conditional_query); return true; } +} // Anonymous namespace std::optional ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_address) { From fcc59b55f7ee8dbcab211abd9734cfc8643526e9 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 16 Jul 2019 11:35:33 -0400 Subject: [PATCH 03/10] video_core/control_flow: Make program_size for ScanFlow() a std::size_t Prevents a truncation warning from occurring with MSVC. Also the internal data structures already treat it as a size_t, so this is just a discrepancy in the interface. --- src/video_core/shader/control_flow.cpp | 4 ++-- src/video_core/shader/control_flow.h | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 440729258c..20f9a64809 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -413,8 +413,8 @@ bool TryQuery(CFGRebuildState& state) { } } // Anonymous namespace -std::optional ScanFlow(const ProgramCode& program_code, u32 program_size, - u32 start_address) { +std::optional ScanFlow(const ProgramCode& program_code, + std::size_t program_size, u32 start_address) { CFGRebuildState state{program_code, program_size, start_address}; // Inspect Code and generate blocks state.labels.clear(); diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 5e8ea32716..728286d70c 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -4,7 +4,6 @@ #pragma once -#include #include #include #include @@ -57,7 +56,7 @@ struct ShaderCharacteristics { std::unordered_set labels{}; }; -std::optional ScanFlow(const ProgramCode& program_code, u32 program_size, - u32 start_address); +std::optional ScanFlow(const ProgramCode& program_code, + std::size_t program_size, u32 start_address); } // namespace VideoCommon::Shader From f6250ef163591e557415201768bb2c223b31881c Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 16 Jul 2019 11:38:48 -0400 Subject: [PATCH 04/10] video_core: Resolve -Wreorder warnings Ensures that the constructor members are always initialized in the order that they're declared in. --- src/video_core/shader/control_flow.cpp | 2 +- src/video_core/texture_cache/surface_base.cpp | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 20f9a64809..6a92c540ee 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -58,7 +58,7 @@ struct BlockInfo { struct CFGRebuildState { explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, const u32 start) - : program_code{program_code}, program_size{program_size}, start{start} {} + : start{start}, program_code{program_code}, program_size{program_size} {} u32 start{}; std::vector block_info{}; diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 6af9044cad..683c492072 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -24,9 +24,8 @@ StagingCache::StagingCache() = default; StagingCache::~StagingCache() = default; SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) - : params{params}, mipmap_sizes(params.num_levels), - mipmap_offsets(params.num_levels), gpu_addr{gpu_addr}, host_memory_size{ - params.GetHostSizeInBytes()} { + : params{params}, host_memory_size{params.GetHostSizeInBytes()}, gpu_addr{gpu_addr}, + mipmap_sizes(params.num_levels), mipmap_offsets(params.num_levels) { std::size_t offset = 0; for (u32 level = 0; level < params.num_levels; ++level) { const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; From da307b1c61f9c4e505f19c80e2fcf2785a4c6e50 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 16 Jul 2019 11:40:58 -0400 Subject: [PATCH 05/10] video_core/control_flow: Use empty() member function for checking emptiness It's what it's there for. --- src/video_core/shader/control_flow.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 6a92c540ee..83549c082c 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -379,8 +379,8 @@ bool TryQuery(CFGRebuildState& state) { // consumes a label. Schedule new queries accordingly if (block.visited) { BlockStack& stack = state.stacks[q.address]; - const bool all_okay = (stack.ssy_stack.size() == 0 || q.ssy_stack == stack.ssy_stack) && - (stack.pbk_stack.size() == 0 || q.pbk_stack == stack.pbk_stack); + const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) && + (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack); state.queries.pop_front(); return all_okay; } From 0d287d3551108a600f00589caba9408b3ff12f62 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 16 Jul 2019 11:42:05 -0400 Subject: [PATCH 06/10] video_core/control_flow: Use the prefix variant of operator++ for iterators Same thing, but potentially allows a standard library implementation to pick a more efficient codepath. --- src/video_core/shader/control_flow.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 83549c082c..c2243337cf 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -365,7 +365,7 @@ bool TryQuery(CFGRebuildState& state) { const auto gather_end = labels.upper_bound(block.end); while (gather_start != gather_end) { cc.push(gather_start->second); - gather_start++; + ++gather_start; } }; if (state.queries.empty()) { @@ -470,7 +470,7 @@ std::optional ScanFlow(const ProgramCode& program_code, continue; } back = next; - next++; + ++next; } return {result_out}; } From 095259a1358b23e343443b79a515dda9141d8f1f Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 16 Jul 2019 11:50:36 -0400 Subject: [PATCH 07/10] video_core/control_flow: Use std::move where applicable Results in less work being done where avoidable. --- src/video_core/shader/control_flow.cpp | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index c2243337cf..4d500320a0 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -371,10 +371,11 @@ bool TryQuery(CFGRebuildState& state) { if (state.queries.empty()) { return false; } + Query& q = state.queries.front(); const u32 block_index = state.registered[q.address]; BlockInfo& block = state.block_info[block_index]; - // If the block is visted, check if the stacks match, else gather the ssy/pbk + // If the block is visited, check if the stacks match, else gather the ssy/pbk // labels into the current stack and look if the branch at the end of the block // consumes a label. Schedule new queries accordingly if (block.visited) { @@ -385,7 +386,8 @@ bool TryQuery(CFGRebuildState& state) { return all_okay; } block.visited = true; - state.stacks[q.address] = BlockStack{q}; + state.stacks.insert_or_assign(q.address, BlockStack{q}); + Query q2(q); state.queries.pop_front(); gather_labels(q2.ssy_stack, state.ssy_labels, block); @@ -394,6 +396,7 @@ bool TryQuery(CFGRebuildState& state) { q2.address = block.end + 1; state.queries.push_back(q2); } + Query conditional_query{q2}; if (block.branch.is_sync) { if (block.branch.address == unassigned_branch) { @@ -408,7 +411,7 @@ bool TryQuery(CFGRebuildState& state) { conditional_query.pbk_stack.pop(); } conditional_query.address = block.branch.address; - state.queries.push_back(conditional_query); + state.queries.push_back(std::move(conditional_query)); return true; } } // Anonymous namespace @@ -416,6 +419,7 @@ bool TryQuery(CFGRebuildState& state) { std::optional ScanFlow(const ProgramCode& program_code, std::size_t program_size, u32 start_address) { CFGRebuildState state{program_code, program_size, start_address}; + // Inspect Code and generate blocks state.labels.clear(); state.labels.emplace(start_address); @@ -425,10 +429,9 @@ std::optional ScanFlow(const ProgramCode& program_code, return {}; } } + // Decompile Stacks - Query start_query{}; - start_query.address = state.start; - state.queries.push_back(start_query); + state.queries.push_back(Query{state.start, {}, {}}); bool decompiled = true; while (!state.queries.empty()) { if (!TryQuery(state)) { @@ -436,14 +439,15 @@ std::optional ScanFlow(const ProgramCode& program_code, break; } } + // Sort and organize results std::sort(state.block_info.begin(), state.block_info.end(), - [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; }); + [](const BlockInfo& a, const BlockInfo& b) { return a.start < b.start; }); ShaderCharacteristics result_out{}; result_out.decompilable = decompiled; result_out.start = start_address; result_out.end = start_address; - for (auto& block : state.block_info) { + for (const auto& block : state.block_info) { ShaderBlock new_block{}; new_block.start = block.start; new_block.end = block.end; @@ -458,8 +462,9 @@ std::optional ScanFlow(const ProgramCode& program_code, } if (result_out.decompilable) { result_out.labels = std::move(state.labels); - return {result_out}; + return {std::move(result_out)}; } + // If it's not decompilable, merge the unlabelled blocks together auto back = result_out.blocks.begin(); auto next = std::next(back); @@ -472,6 +477,6 @@ std::optional ScanFlow(const ProgramCode& program_code, back = next; ++next; } - return {result_out}; + return {std::move(result_out)}; } } // namespace VideoCommon::Shader From c3dd5c7667ae9c79cfb515901adfe18ef36e56c3 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 16 Jul 2019 11:52:08 -0400 Subject: [PATCH 08/10] video_core/control_flow: Remove unnecessary BlockStack copy constructor This is the default behavior of the copy constructor, so it doesn't need to be specified. While we're at it we can make the other non-default constructor explicit. --- src/video_core/shader/control_flow.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 4d500320a0..37792d420e 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -29,8 +29,7 @@ struct Query { struct BlockStack { BlockStack() = default; - BlockStack(const BlockStack& b) = default; - BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} + explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} std::stack ssy_stack{}; std::stack pbk_stack{}; }; From e79217859845550778d2919eb22adb1ca590b6bb Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 16 Jul 2019 11:56:37 -0400 Subject: [PATCH 09/10] video_core/control_flow: Prevent sign conversion in TryGetBlock() The return value is a u32, not an s32, so this would result in an implicit signedness conversion. --- src/video_core/shader/control_flow.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 37792d420e..ec3a766900 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -84,7 +84,7 @@ std::pair TryGetBlock(CFGRebuildState& state, u32 address) return {BlockCollision::Inside, index}; } } - return {BlockCollision::None, -1}; + return {BlockCollision::None, 0xFFFFFFFF}; } struct ParseInfo { From 2f1921b8f4a301899d1405fb09f8bfa8fb091ab0 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Tue, 16 Jul 2019 11:59:57 -0400 Subject: [PATCH 10/10] video_core/control_flow: Provide operator!= for types with operator== Provides operational symmetry for the respective structures. --- src/video_core/shader/control_flow.h | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 728286d70c..b0a5e4f8c9 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -25,27 +25,44 @@ struct Condition { bool IsUnconditional() const { return predicate == Pred::UnusedIndex && cc == ConditionCode::T; } + bool operator==(const Condition& other) const { return std::tie(predicate, cc) == std::tie(other.predicate, other.cc); } + + bool operator!=(const Condition& other) const { + return !operator==(other); + } }; struct ShaderBlock { - u32 start{}; - u32 end{}; - bool ignore_branch{}; struct Branch { Condition cond{}; bool kills{}; s32 address{}; + bool operator==(const Branch& b) const { return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address); } - } branch{}; + + bool operator!=(const Branch& b) const { + return !operator==(b); + } + }; + + u32 start{}; + u32 end{}; + bool ignore_branch{}; + Branch branch{}; + bool operator==(const ShaderBlock& sb) const { return std::tie(start, end, ignore_branch, branch) == std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch); } + + bool operator!=(const ShaderBlock& sb) const { + return !operator==(sb); + } }; struct ShaderCharacteristics {