diff --git a/externals/dynarmic/src/dynarmic/CMakeLists.txt b/externals/dynarmic/src/dynarmic/CMakeLists.txt index a43c9eae10..a91ee4cfda 100644 --- a/externals/dynarmic/src/dynarmic/CMakeLists.txt +++ b/externals/dynarmic/src/dynarmic/CMakeLists.txt @@ -53,8 +53,6 @@ add_library(dynarmic common/lut_from_list.h common/math_util.cpp common/math_util.h - common/memory_pool.cpp - common/memory_pool.h common/safe_ops.h common/spin_lock.h common/string_util.h @@ -153,6 +151,7 @@ if ("A64" IN_LIST DYNARMIC_FRONTENDS) ir/opt/a64_callback_config_pass.cpp ir/opt/a64_get_set_elimination_pass.cpp ir/opt/a64_merge_interpret_blocks.cpp + ir/opt/x64_peepholes.cpp ) endif() diff --git a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp index b9a705813f..ba3244eb65 100644 --- a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp @@ -35,11 +35,6 @@ EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block) EmitContext::~EmitContext() = default; -void EmitContext::EraseInstruction(IR::Inst* inst) { - block.Instructions().erase(inst); - inst->ClearArgs(); -} - EmitX64::EmitX64(BlockOfCode& code) : code(code) { exception_handler.Register(code); diff --git a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64.h b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64.h index fbe749b2ab..3c885192b5 100644 --- a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64.h +++ b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64.h @@ -54,10 +54,7 @@ struct EmitContext { EmitContext(RegAlloc& reg_alloc, IR::Block& block); virtual ~EmitContext(); - void EraseInstruction(IR::Inst* inst); - virtual FP::FPCR FPCR(bool fpcr_controlled = true) const = 0; - virtual bool HasOptimization(OptimizationFlag flag) const = 0; RegAlloc& reg_alloc; diff --git a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_crc32.cpp b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_crc32.cpp index 842a8612ee..736e15b195 100644 --- a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_crc32.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_crc32.cpp @@ -40,7 +40,6 @@ static void EmitCRC32Castagnoli(BlockOfCode& code, EmitContext& ctx, IR::Inst* i static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, const int data_size) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size < 32) { const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg64 value = ctx.reg_alloc.UseScratchGpr(args[1]); @@ -69,10 +68,7 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co code.pextrd(crc, xmm_value, 2); ctx.reg_alloc.DefineValue(inst, crc); - return; - } - - if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 32) { + } else if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 32) { const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(args[1]).cvt32(); const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm(); @@ -90,10 +86,7 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co code.pextrd(crc, xmm_value, 2); ctx.reg_alloc.DefineValue(inst, crc); - return; - } - - if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 64) { + } else if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 64) { const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]); const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm(); @@ -111,12 +104,11 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co code.pextrd(crc, xmm_value, 2); ctx.reg_alloc.DefineValue(inst, crc); - return; + } else { + ctx.reg_alloc.HostCall(inst, args[0], args[1], {}); + code.mov(code.ABI_PARAM3, data_size / CHAR_BIT); + code.CallFunction(&CRC32::ComputeCRC32ISO); } - - ctx.reg_alloc.HostCall(inst, args[0], args[1], {}); - code.mov(code.ABI_PARAM3, data_size / CHAR_BIT); - code.CallFunction(&CRC32::ComputeCRC32ISO); } void EmitX64::EmitCRC32Castagnoli8(EmitContext& ctx, IR::Inst* inst) { diff --git a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp index 88d0786b03..f67fd53467 100644 --- a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp @@ -236,23 +236,19 @@ void DenormalsAreZero(BlockOfCode& code, FP::FPCR fpcr, std::initializer_list(xword, denormal_to_zero)); - - for (const Xbyak::Xmm& xmm : to_daz) { + for (const Xbyak::Xmm& xmm : to_daz) FCODE(vfixupimmp)(xmm, xmm, tmp, u8(0)); - } - return; - } - - if (fpcr.RMode() != FP::RoundingMode::TowardsMinusInfinity) { - code.movaps(tmp, GetNegativeZeroVector(code)); } else { - code.xorps(tmp, tmp); - } - for (const Xbyak::Xmm& xmm : to_daz) { - FCODE(addp)(xmm, tmp); + if (fpcr.RMode() != FP::RoundingMode::TowardsMinusInfinity) { + code.movaps(tmp, GetNegativeZeroVector(code)); + } else { + code.xorps(tmp, tmp); + } + for (const Xbyak::Xmm& xmm : to_daz) + FCODE(addp)(xmm, tmp); } } } diff --git a/externals/dynarmic/src/dynarmic/common/memory_pool.cpp b/externals/dynarmic/src/dynarmic/common/memory_pool.cpp deleted file mode 100644 index f41dd92af5..0000000000 --- a/externals/dynarmic/src/dynarmic/common/memory_pool.cpp +++ /dev/null @@ -1,13 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/common/memory_pool.h" - -#include - -namespace Dynarmic::Common { - - -} // namespace Dynarmic::Common diff --git a/externals/dynarmic/src/dynarmic/common/memory_pool.h b/externals/dynarmic/src/dynarmic/common/memory_pool.h deleted file mode 100644 index c99316e107..0000000000 --- a/externals/dynarmic/src/dynarmic/common/memory_pool.h +++ /dev/null @@ -1,61 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#pragma once - -#include -#include - -namespace Dynarmic::Common { - -/// @tparam object_size Byte-size of objects to construct -/// @tparam slab_size Number of objects to have per slab -template -class Pool { -public: - inline Pool() noexcept { - AllocateNewSlab(); - } - inline ~Pool() noexcept { - std::free(current_slab); - for (char* slab : slabs) { - std::free(slab); - } - } - - Pool(const Pool&) = delete; - Pool(Pool&&) = delete; - - Pool& operator=(const Pool&) = delete; - Pool& operator=(Pool&&) = delete; - - /// @brief Returns a pointer to an `object_size`-bytes block of memory. - [[nodiscard]] void* Alloc() noexcept { - if (remaining == 0) { - slabs.push_back(current_slab); - AllocateNewSlab(); - } - void* ret = static_cast(current_ptr); - current_ptr += object_size; - remaining--; - return ret; - } -private: - /// @brief Allocates a completely new memory slab. - /// Used when an entirely new slab is needed - /// due the current one running out of usable space. - void AllocateNewSlab() noexcept { - current_slab = static_cast(std::malloc(object_size * slab_size)); - current_ptr = current_slab; - remaining = slab_size; - } - - std::vector slabs; - char* current_slab = nullptr; - char* current_ptr = nullptr; - size_t remaining = 0; -}; - -} // namespace Dynarmic::Common diff --git a/externals/dynarmic/src/dynarmic/ir/basic_block.cpp b/externals/dynarmic/src/dynarmic/ir/basic_block.cpp index c818fe0b17..8b5b5972a4 100644 --- a/externals/dynarmic/src/dynarmic/ir/basic_block.cpp +++ b/externals/dynarmic/src/dynarmic/ir/basic_block.cpp @@ -13,7 +13,6 @@ #include #include -#include "dynarmic/common/memory_pool.h" #include "dynarmic/frontend/A32/a32_types.h" #include "dynarmic/frontend/A64/a64_types.h" #include "dynarmic/ir/cond.h" @@ -24,8 +23,7 @@ namespace Dynarmic::IR { Block::Block(const LocationDescriptor& location) : location{location}, end_location{location}, - cond{Cond::AL}, - instruction_alloc_pool{std::make_unique>()} + cond{Cond::AL} { } @@ -37,7 +35,7 @@ Block::Block(const LocationDescriptor& location) /// @param args A sequence of Value instances used as arguments for the instruction. /// @returns Iterator to the newly created instruction. Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list args) noexcept { - IR::Inst* inst = new (instruction_alloc_pool->Alloc()) IR::Inst(opcode); + IR::Inst* inst = new IR::Inst(opcode); DEBUG_ASSERT(args.size() == inst->NumArgs()); std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable { inst->SetArg(index, arg); @@ -83,9 +81,7 @@ static std::string TerminalToString(const Terminal& terminal_variant) noexcept { } std::string DumpBlock(const IR::Block& block) noexcept { - std::string ret; - - ret += fmt::format("Block: location={}\n", block.Location()); + std::string ret = fmt::format("Block: location={}\n", block.Location()); ret += fmt::format("cycles={}", block.CycleCount()); ret += fmt::format(", entry_cond={}", A64::CondToString(block.GetCondition())); if (block.GetCondition() != Cond::AL) { @@ -113,6 +109,8 @@ std::string DumpBlock(const IR::Block& block) noexcept { return fmt::format("#{:#x}", arg.GetU32()); case Type::U64: return fmt::format("#{:#x}", arg.GetU64()); + case Type::U128: + return fmt::format("#"); case Type::A32Reg: return A32::RegToString(arg.GetA32RegRef()); case Type::A32ExtReg: @@ -155,14 +153,9 @@ std::string DumpBlock(const IR::Block& block) noexcept { ret += fmt::format("", GetNameOf(actual_type), GetNameOf(expected_type)); } } - - ret += fmt::format(" (uses: {})", inst.UseCount()); - - ret += '\n'; + ret += fmt::format(" (uses: {})\n", inst.UseCount()); } - ret += "terminal = " + TerminalToString(block.GetTerminal()) + '\n'; - return ret; } diff --git a/externals/dynarmic/src/dynarmic/ir/basic_block.h b/externals/dynarmic/src/dynarmic/ir/basic_block.h index 6608f0e3a2..b1ccf672a0 100644 --- a/externals/dynarmic/src/dynarmic/ir/basic_block.h +++ b/externals/dynarmic/src/dynarmic/ir/basic_block.h @@ -17,8 +17,6 @@ #include "dynarmic/ir/microinstruction.h" #include "dynarmic/ir/terminal.h" #include "dynarmic/ir/value.h" -#include "dynarmic/ir/dense_list.h" -#include "dynarmic/common/memory_pool.h" namespace Dynarmic::IR { @@ -76,7 +74,7 @@ public: /// @param op Opcode representing the instruction to add. /// @param args A sequence of Value instances used as arguments for the instruction. inline void AppendNewInst(const Opcode opcode, const std::initializer_list args) noexcept { - PrependNewInst(end(), opcode, args); + PrependNewInst(instructions.end(), opcode, args); } iterator PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list args) noexcept; @@ -171,8 +169,6 @@ private: LocationDescriptor end_location; /// Conditional to pass in order to execute this block Cond cond; - /// Memory pool for instruction list - std::unique_ptr> instruction_alloc_pool; /// Terminal instruction of this block. Terminal terminal = Term::Invalid{}; /// Number of cycles this block takes to execute if the conditional fails. diff --git a/externals/dynarmic/src/dynarmic/ir/dense_list.h b/externals/dynarmic/src/dynarmic/ir/dense_list.h deleted file mode 100644 index a399c12d0b..0000000000 --- a/externals/dynarmic/src/dynarmic/ir/dense_list.h +++ /dev/null @@ -1,58 +0,0 @@ -#pragma once - -#include -#include -#include - -namespace Dynarmic { - template struct dense_list { - using difference_type = std::ptrdiff_t; - using size_type = std::size_t; - using value_type = T; - using pointer = value_type*; - using const_pointer = const value_type*; - using reference = value_type&; - using const_reference = const value_type&; - using iterator = std::deque::iterator; - using const_iterator = std::deque::const_iterator; - using reverse_iterator = std::reverse_iterator; - using const_reverse_iterator = std::reverse_iterator; - - inline bool empty() const noexcept { return list.empty(); } - inline size_type size() const noexcept { return list.size(); } - - inline value_type& front() noexcept { return list.front(); } - inline const value_type& front() const noexcept { return list.front(); } - - inline value_type& back() noexcept { return list.back(); } - inline const value_type& back() const noexcept { return list.back(); } - - inline iterator begin() noexcept { return list.begin(); } - inline const_iterator begin() const noexcept { return list.begin(); } - inline iterator end() noexcept { return list.end(); } - inline const_iterator end() const noexcept { return list.end(); } - - inline reverse_iterator rbegin() noexcept { return list.rbegin(); } - inline const_reverse_iterator rbegin() const noexcept { return list.rbegin(); } - inline reverse_iterator rend() noexcept { return list.rend(); } - inline const_reverse_iterator rend() const noexcept { return list.rend(); } - - inline const_iterator cbegin() const noexcept { return list.cbegin(); } - inline const_iterator cend() const noexcept { return list.cend(); } - - inline const_reverse_iterator crbegin() const noexcept { return list.crbegin(); } - inline const_reverse_iterator crend() const noexcept { return list.crend(); } - - inline iterator insert_before(iterator it, value_type& value) noexcept { - if (it == list.begin()) { - list.push_front(value); - return list.begin(); - } - auto const index = std::distance(list.begin(), it - 1); - list.insert(it - 1, value); - return list.begin() + index; - } - - std::deque list; - }; -} diff --git a/externals/dynarmic/src/dynarmic/ir/ir_emitter.h b/externals/dynarmic/src/dynarmic/ir/ir_emitter.h index 23cfb47498..6f216089b9 100644 --- a/externals/dynarmic/src/dynarmic/ir/ir_emitter.h +++ b/externals/dynarmic/src/dynarmic/ir/ir_emitter.h @@ -2947,19 +2947,10 @@ public: block.SetTerminal(terminal); } - void SetInsertionPointBefore(IR::Inst* new_insertion_point) { - insertion_point = IR::Block::iterator{*new_insertion_point}; - } - void SetInsertionPointBefore(IR::Block::iterator new_insertion_point) { insertion_point = new_insertion_point; } - void SetInsertionPointAfter(IR::Inst* new_insertion_point) { - insertion_point = IR::Block::iterator{*new_insertion_point}; - ++insertion_point; - } - void SetInsertionPointAfter(IR::Block::iterator new_insertion_point) { insertion_point = new_insertion_point; ++insertion_point; diff --git a/externals/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp b/externals/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp index 06e159ba48..fe46a9341d 100644 --- a/externals/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp +++ b/externals/dynarmic/src/dynarmic/ir/opt/a32_get_set_elimination_pass.cpp @@ -22,8 +22,7 @@ namespace Dynarmic::Optimization { namespace { void FlagsPass(IR::Block& block) { - using Iterator = std::reverse_iterator; - + using Iterator = IR::Block::reverse_iterator; struct FlagInfo { bool set_not_required = false; bool has_value_request = false; @@ -185,10 +184,10 @@ void RegisterPass(IR::Block& block) { using Iterator = IR::Block::iterator; struct RegInfo { - IR::Value register_value; std::optional last_set_instruction; + IR::Value register_value; }; - std::array reg_info; + alignas(64) std::array reg_info; const auto do_get = [](RegInfo& info, Iterator get_inst) { if (info.register_value.IsEmpty()) { @@ -203,12 +202,12 @@ void RegisterPass(IR::Block& block) { (*info.last_set_instruction)->Invalidate(); } info = { - .register_value = value, .last_set_instruction = set_inst, + .register_value = value, }; }; - enum class ExtValueType { + enum class ExtValueType : std::uint8_t { Empty, Single, Double, @@ -216,19 +215,20 @@ void RegisterPass(IR::Block& block) { VectorQuad, }; struct ExtRegInfo { - ExtValueType value_type = {}; IR::Value register_value; std::optional last_set_instruction; + ExtValueType value_type = {}; }; - std::array ext_reg_info; + // Max returned by RegNumber = 31 (but multiplied by 4 in some cases) + alignas(64) std::array ext_reg_info; const auto do_ext_get = [](ExtValueType type, std::initializer_list> infos, Iterator get_inst) { if (!std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { for (auto& info : infos) { info.get() = { - .value_type = type, .register_value = IR::Value(&*get_inst), .last_set_instruction = std::nullopt, + .value_type = type, }; } return; @@ -244,9 +244,9 @@ void RegisterPass(IR::Block& block) { } for (auto& info : infos) { info.get() = { - .value_type = type, .register_value = value, .last_set_instruction = set_inst, + .value_type = type, }; } }; diff --git a/externals/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp b/externals/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp index 79d9769520..36f2a9c495 100644 --- a/externals/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp +++ b/externals/dynarmic/src/dynarmic/ir/opt/a64_callback_config_pass.cpp @@ -17,7 +17,8 @@ void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) { return; } - for (auto& inst : block) { + for (auto iter = block.begin(); iter != block.end(); iter++) { + auto& inst = *iter; if (inst.GetOpcode() != IR::Opcode::A64DataCacheOperationRaised) { continue; } @@ -26,7 +27,7 @@ void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) { if (op == A64::DataCacheOperation::ZeroByVA) { A64::IREmitter ir{block}; ir.current_location = A64::LocationDescriptor{IR::LocationDescriptor{inst.GetArg(0).GetU64()}}; - ir.SetInsertionPointBefore(&inst); + ir.SetInsertionPointBefore(iter); size_t bytes = 4 << static_cast(conf.dczid_el0 & 0b1111); IR::U64 addr{inst.GetArg(2)}; diff --git a/externals/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp b/externals/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp index 83530fc453..ad40a3367c 100644 --- a/externals/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp +++ b/externals/dynarmic/src/dynarmic/ir/opt/constant_propagation_pass.cpp @@ -22,9 +22,9 @@ using Op = Dynarmic::IR::Opcode; namespace { -// Tiny helper to avoid the need to store based off the opcode -// bit size all over the place within folding functions. -void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) { +/// Tiny helper to avoid the need to store based off the opcode +/// bit size all over the place within folding functions. +static void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) { if (is_32_bit) { inst.ReplaceUsesWith(IR::Value{static_cast(value)}); } else { @@ -32,12 +32,12 @@ void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) { } } -IR::Value Value(bool is_32_bit, u64 value) { +static IR::Value Value(bool is_32_bit, u64 value) { return is_32_bit ? IR::Value{static_cast(value)} : IR::Value{value}; } template -bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) { +static bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) { const auto lhs = inst.GetArg(0); const auto rhs = inst.GetArg(1); @@ -75,7 +75,7 @@ bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) { return true; } -void FoldAdd(IR::Inst& inst, bool is_32_bit) { +static void FoldAdd(IR::Inst& inst, bool is_32_bit) { const auto lhs = inst.GetArg(0); const auto rhs = inst.GetArg(1); const auto carry = inst.GetArg(2); @@ -125,7 +125,7 @@ void FoldAdd(IR::Inst& inst, bool is_32_bit) { /// 4. x & y -> y (where x has all bits set to 1) /// 5. x & y -> x (where y has all bits set to 1) /// -void FoldAND(IR::Inst& inst, bool is_32_bit) { +static void FoldAND(IR::Inst& inst, bool is_32_bit) { if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a & b; })) { const auto rhs = inst.GetArg(1); if (rhs.IsZero()) { @@ -140,7 +140,7 @@ void FoldAND(IR::Inst& inst, bool is_32_bit) { /// /// 1. imm -> swap(imm) /// -void FoldByteReverse(IR::Inst& inst, Op op) { +static void FoldByteReverse(IR::Inst& inst, Op op) { const auto operand = inst.GetArg(0); if (!operand.IsImmediate()) { @@ -165,7 +165,7 @@ void FoldByteReverse(IR::Inst& inst, Op op) { /// 2. imm_x / imm_y -> result /// 3. x / 1 -> x /// -void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) { +static void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) { const auto rhs = inst.GetArg(1); if (rhs.IsZero()) { @@ -193,7 +193,7 @@ void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) { // 2. x ^ 0 -> x // 3. 0 ^ y -> y // -void FoldEOR(IR::Inst& inst, bool is_32_bit) { +static void FoldEOR(IR::Inst& inst, bool is_32_bit) { if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a ^ b; })) { const auto rhs = inst.GetArg(1); if (rhs.IsZero()) { @@ -202,7 +202,7 @@ void FoldEOR(IR::Inst& inst, bool is_32_bit) { } } -void FoldLeastSignificantByte(IR::Inst& inst) { +static void FoldLeastSignificantByte(IR::Inst& inst) { if (!inst.AreAllArgsImmediates()) { return; } @@ -211,7 +211,7 @@ void FoldLeastSignificantByte(IR::Inst& inst) { inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); } -void FoldLeastSignificantHalf(IR::Inst& inst) { +static void FoldLeastSignificantHalf(IR::Inst& inst) { if (!inst.AreAllArgsImmediates()) { return; } @@ -220,7 +220,7 @@ void FoldLeastSignificantHalf(IR::Inst& inst) { inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); } -void FoldLeastSignificantWord(IR::Inst& inst) { +static void FoldLeastSignificantWord(IR::Inst& inst) { if (!inst.AreAllArgsImmediates()) { return; } @@ -229,7 +229,7 @@ void FoldLeastSignificantWord(IR::Inst& inst) { inst.ReplaceUsesWith(IR::Value{static_cast(operand.GetImmediateAsU64())}); } -void FoldMostSignificantBit(IR::Inst& inst) { +static void FoldMostSignificantBit(IR::Inst& inst) { if (!inst.AreAllArgsImmediates()) { return; } @@ -238,7 +238,7 @@ void FoldMostSignificantBit(IR::Inst& inst) { inst.ReplaceUsesWith(IR::Value{(operand.GetImmediateAsU64() >> 31) != 0}); } -void FoldMostSignificantWord(IR::Inst& inst) { +static void FoldMostSignificantWord(IR::Inst& inst) { IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); if (!inst.AreAllArgsImmediates()) { @@ -260,7 +260,7 @@ void FoldMostSignificantWord(IR::Inst& inst) { // 4. x * 1 -> x // 5. 1 * y -> y // -void FoldMultiply(IR::Inst& inst, bool is_32_bit) { +static void FoldMultiply(IR::Inst& inst, bool is_32_bit) { if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a * b; })) { const auto rhs = inst.GetArg(1); if (rhs.IsZero()) { @@ -272,7 +272,7 @@ void FoldMultiply(IR::Inst& inst, bool is_32_bit) { } // Folds NOT operations if the contained value is an immediate. -void FoldNOT(IR::Inst& inst, bool is_32_bit) { +static void FoldNOT(IR::Inst& inst, bool is_32_bit) { const auto operand = inst.GetArg(0); if (!operand.IsImmediate()) { @@ -289,7 +289,7 @@ void FoldNOT(IR::Inst& inst, bool is_32_bit) { // 2. x | 0 -> x // 3. 0 | y -> y // -void FoldOR(IR::Inst& inst, bool is_32_bit) { +static void FoldOR(IR::Inst& inst, bool is_32_bit) { if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a | b; })) { const auto rhs = inst.GetArg(1); if (rhs.IsZero()) { @@ -298,7 +298,7 @@ void FoldOR(IR::Inst& inst, bool is_32_bit) { } } -bool FoldShifts(IR::Inst& inst) { +static bool FoldShifts(IR::Inst& inst) { IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); // The 32-bit variants can contain 3 arguments, while the @@ -328,7 +328,7 @@ bool FoldShifts(IR::Inst& inst) { return true; } -void FoldSignExtendXToWord(IR::Inst& inst) { +static void FoldSignExtendXToWord(IR::Inst& inst) { if (!inst.AreAllArgsImmediates()) { return; } @@ -337,7 +337,7 @@ void FoldSignExtendXToWord(IR::Inst& inst) { inst.ReplaceUsesWith(IR::Value{static_cast(value)}); } -void FoldSignExtendXToLong(IR::Inst& inst) { +static void FoldSignExtendXToLong(IR::Inst& inst) { if (!inst.AreAllArgsImmediates()) { return; } @@ -346,7 +346,7 @@ void FoldSignExtendXToLong(IR::Inst& inst) { inst.ReplaceUsesWith(IR::Value{static_cast(value)}); } -void FoldSub(IR::Inst& inst, bool is_32_bit) { +static void FoldSub(IR::Inst& inst, bool is_32_bit) { if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { return; } @@ -359,7 +359,7 @@ void FoldSub(IR::Inst& inst, bool is_32_bit) { ReplaceUsesWith(inst, is_32_bit, result); } -void FoldZeroExtendXToWord(IR::Inst& inst) { +static void FoldZeroExtendXToWord(IR::Inst& inst) { if (!inst.AreAllArgsImmediates()) { return; } @@ -368,7 +368,7 @@ void FoldZeroExtendXToWord(IR::Inst& inst) { inst.ReplaceUsesWith(IR::Value{static_cast(value)}); } -void FoldZeroExtendXToLong(IR::Inst& inst) { +static void FoldZeroExtendXToLong(IR::Inst& inst) { if (!inst.AreAllArgsImmediates()) { return; } diff --git a/externals/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp b/externals/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp index e87fcc335b..a3bab8e37a 100644 --- a/externals/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp +++ b/externals/dynarmic/src/dynarmic/ir/opt/identity_removal_pass.cpp @@ -13,11 +13,8 @@ namespace Dynarmic::Optimization { void IdentityRemovalPass(IR::Block& block) { std::vector to_invalidate; - - auto iter = block.begin(); - while (iter != block.end()) { + for (auto iter = block.begin(); iter != block.end(); ) { IR::Inst& inst = *iter; - const size_t num_args = inst.NumArgs(); for (size_t i = 0; i < num_args; i++) { while (true) { @@ -27,18 +24,15 @@ void IdentityRemovalPass(IR::Block& block) { inst.SetArg(i, arg.GetInst()->GetArg(0)); } } - if (inst.GetOpcode() == IR::Opcode::Identity || inst.GetOpcode() == IR::Opcode::Void) { - iter = block.Instructions().erase(inst); + iter = block.Instructions().erase(iter); to_invalidate.push_back(&inst); } else { ++iter; } } - - for (IR::Inst* inst : to_invalidate) { + for (auto* inst : to_invalidate) inst->Invalidate(); - } } } // namespace Dynarmic::Optimization diff --git a/externals/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp b/externals/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp index 1aa3aea91e..1a28817036 100644 --- a/externals/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp +++ b/externals/dynarmic/src/dynarmic/ir/opt/polyfill_pass.cpp @@ -13,7 +13,7 @@ namespace Dynarmic::Optimization { namespace { -void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) { +static void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) { const IR::U128 x = (IR::U128)inst.GetArg(0); const IR::U128 y = (IR::U128)inst.GetArg(1); @@ -37,13 +37,14 @@ void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) { inst.ReplaceUsesWith(result); } -void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) { +static void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) { const IR::U128 x = (IR::U128)inst.GetArg(0); const IR::U128 y = (IR::U128)inst.GetArg(1); const IR::U128 z = (IR::U128)inst.GetArg(2); const IR::U128 T0 = ir.VectorExtract(y, z, 32); + // TODO: this can use better pipelining m8 const IR::U128 lower_half = [&] { const IR::U128 T = ir.VectorRotateWholeVectorRight(z, 64); const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17); @@ -73,15 +74,15 @@ void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) { inst.ReplaceUsesWith(result); } -IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { +static IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { return ir.Eor(ir.And(ir.Eor(y, z), x), z); } -IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { +static IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z)); } -IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) { +static IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) { const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2)); const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13)); const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22)); @@ -89,7 +90,7 @@ IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) { return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); } -IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) { +static IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) { const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6)); const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11)); const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25)); @@ -97,7 +98,7 @@ IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) { return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); } -void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) { +static void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) { IR::U128 x = (IR::U128)inst.GetArg(0); IR::U128 y = (IR::U128)inst.GetArg(1); const IR::U128 w = (IR::U128)inst.GetArg(2); @@ -139,7 +140,7 @@ void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) { } template -void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) { +static void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) { IR::U128 n = (IR::U128)inst.GetArg(0); IR::U128 m = (IR::U128)inst.GetArg(1); @@ -159,54 +160,52 @@ void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) { } IR::IREmitter ir{block}; - - for (auto& inst : block) { - ir.SetInsertionPointBefore(&inst); - - switch (inst.GetOpcode()) { + for (auto iter = block.begin(); iter != block.end(); iter++) { + ir.SetInsertionPointBefore(iter); + switch (iter->GetOpcode()) { case IR::Opcode::SHA256MessageSchedule0: if (polyfill.sha256) { - PolyfillSHA256MessageSchedule0(ir, inst); + PolyfillSHA256MessageSchedule0(ir, *iter); } break; case IR::Opcode::SHA256MessageSchedule1: if (polyfill.sha256) { - PolyfillSHA256MessageSchedule1(ir, inst); + PolyfillSHA256MessageSchedule1(ir, *iter); } break; case IR::Opcode::SHA256Hash: if (polyfill.sha256) { - PolyfillSHA256Hash(ir, inst); + PolyfillSHA256Hash(ir, *iter); } break; case IR::Opcode::VectorMultiplySignedWiden8: if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<8, true>(ir, inst); + PolyfillVectorMultiplyWiden<8, true>(ir, *iter); } break; case IR::Opcode::VectorMultiplySignedWiden16: if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<16, true>(ir, inst); + PolyfillVectorMultiplyWiden<16, true>(ir, *iter); } break; case IR::Opcode::VectorMultiplySignedWiden32: if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<32, true>(ir, inst); + PolyfillVectorMultiplyWiden<32, true>(ir, *iter); } break; case IR::Opcode::VectorMultiplyUnsignedWiden8: if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<8, false>(ir, inst); + PolyfillVectorMultiplyWiden<8, false>(ir, *iter); } break; case IR::Opcode::VectorMultiplyUnsignedWiden16: if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<16, false>(ir, inst); + PolyfillVectorMultiplyWiden<16, false>(ir, *iter); } break; case IR::Opcode::VectorMultiplyUnsignedWiden32: if (polyfill.vector_multiply_widen) { - PolyfillVectorMultiplyWiden<32, false>(ir, inst); + PolyfillVectorMultiplyWiden<32, false>(ir, *iter); } break; default: