[dynarmic] remove memory pool (standard malloc does a better job)

This commit is contained in:
lizzie 2025-07-19 00:51:47 +01:00 committed by crueter
parent fc6f9de3fa
commit ab631e6b28
16 changed files with 85 additions and 264 deletions

View file

@ -53,8 +53,6 @@ add_library(dynarmic
common/lut_from_list.h common/lut_from_list.h
common/math_util.cpp common/math_util.cpp
common/math_util.h common/math_util.h
common/memory_pool.cpp
common/memory_pool.h
common/safe_ops.h common/safe_ops.h
common/spin_lock.h common/spin_lock.h
common/string_util.h common/string_util.h
@ -153,6 +151,7 @@ if ("A64" IN_LIST DYNARMIC_FRONTENDS)
ir/opt/a64_callback_config_pass.cpp ir/opt/a64_callback_config_pass.cpp
ir/opt/a64_get_set_elimination_pass.cpp ir/opt/a64_get_set_elimination_pass.cpp
ir/opt/a64_merge_interpret_blocks.cpp ir/opt/a64_merge_interpret_blocks.cpp
ir/opt/x64_peepholes.cpp
) )
endif() endif()

View file

@ -35,11 +35,6 @@ EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block)
EmitContext::~EmitContext() = default; EmitContext::~EmitContext() = default;
void EmitContext::EraseInstruction(IR::Inst* inst) {
block.Instructions().erase(inst);
inst->ClearArgs();
}
EmitX64::EmitX64(BlockOfCode& code) EmitX64::EmitX64(BlockOfCode& code)
: code(code) { : code(code) {
exception_handler.Register(code); exception_handler.Register(code);

View file

@ -54,10 +54,7 @@ struct EmitContext {
EmitContext(RegAlloc& reg_alloc, IR::Block& block); EmitContext(RegAlloc& reg_alloc, IR::Block& block);
virtual ~EmitContext(); virtual ~EmitContext();
void EraseInstruction(IR::Inst* inst);
virtual FP::FPCR FPCR(bool fpcr_controlled = true) const = 0; virtual FP::FPCR FPCR(bool fpcr_controlled = true) const = 0;
virtual bool HasOptimization(OptimizationFlag flag) const = 0; virtual bool HasOptimization(OptimizationFlag flag) const = 0;
RegAlloc& reg_alloc; RegAlloc& reg_alloc;

View file

@ -40,7 +40,6 @@ static void EmitCRC32Castagnoli(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, const int data_size) { static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, const int data_size) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst); auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size < 32) { if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size < 32) {
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg64 value = ctx.reg_alloc.UseScratchGpr(args[1]); const Xbyak::Reg64 value = ctx.reg_alloc.UseScratchGpr(args[1]);
@ -69,10 +68,7 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
code.pextrd(crc, xmm_value, 2); code.pextrd(crc, xmm_value, 2);
ctx.reg_alloc.DefineValue(inst, crc); ctx.reg_alloc.DefineValue(inst, crc);
return; } else if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 32) {
}
if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 32) {
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(args[1]).cvt32(); const Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(args[1]).cvt32();
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
@ -90,10 +86,7 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
code.pextrd(crc, xmm_value, 2); code.pextrd(crc, xmm_value, 2);
ctx.reg_alloc.DefineValue(inst, crc); ctx.reg_alloc.DefineValue(inst, crc);
return; } else if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 64) {
}
if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 64) {
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]); const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]);
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm(); const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
@ -111,12 +104,11 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
code.pextrd(crc, xmm_value, 2); code.pextrd(crc, xmm_value, 2);
ctx.reg_alloc.DefineValue(inst, crc); ctx.reg_alloc.DefineValue(inst, crc);
return; } else {
}
ctx.reg_alloc.HostCall(inst, args[0], args[1], {}); ctx.reg_alloc.HostCall(inst, args[0], args[1], {});
code.mov(code.ABI_PARAM3, data_size / CHAR_BIT); code.mov(code.ABI_PARAM3, data_size / CHAR_BIT);
code.CallFunction(&CRC32::ComputeCRC32ISO); code.CallFunction(&CRC32::ComputeCRC32ISO);
}
} }
void EmitX64::EmitCRC32Castagnoli8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitCRC32Castagnoli8(EmitContext& ctx, IR::Inst* inst) {

View file

@ -236,22 +236,18 @@ void DenormalsAreZero(BlockOfCode& code, FP::FPCR fpcr, std::initializer_list<Xb
FpFixup::Norm_Src, FpFixup::Norm_Src,
FpFixup::Norm_Src, FpFixup::Norm_Src,
FpFixup::Norm_Src, FpFixup::Norm_Src,
FpFixup::Norm_Src); FpFixup::Norm_Src
);
FCODE(vmovap)(tmp, code.BConst<fsize>(xword, denormal_to_zero)); FCODE(vmovap)(tmp, code.BConst<fsize>(xword, denormal_to_zero));
for (const Xbyak::Xmm& xmm : to_daz)
for (const Xbyak::Xmm& xmm : to_daz) {
FCODE(vfixupimmp)(xmm, xmm, tmp, u8(0)); FCODE(vfixupimmp)(xmm, xmm, tmp, u8(0));
} } else {
return;
}
if (fpcr.RMode() != FP::RoundingMode::TowardsMinusInfinity) { if (fpcr.RMode() != FP::RoundingMode::TowardsMinusInfinity) {
code.movaps(tmp, GetNegativeZeroVector<fsize>(code)); code.movaps(tmp, GetNegativeZeroVector<fsize>(code));
} else { } else {
code.xorps(tmp, tmp); code.xorps(tmp, tmp);
} }
for (const Xbyak::Xmm& xmm : to_daz) { for (const Xbyak::Xmm& xmm : to_daz)
FCODE(addp)(xmm, tmp); FCODE(addp)(xmm, tmp);
} }
} }

View file

@ -1,13 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/common/memory_pool.h"
#include <cstdlib>
namespace Dynarmic::Common {
} // namespace Dynarmic::Common

View file

@ -1,61 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <cstddef>
#include <vector>
namespace Dynarmic::Common {
/// @tparam object_size Byte-size of objects to construct
/// @tparam slab_size Number of objects to have per slab
template<size_t object_size, size_t slab_size>
class Pool {
public:
inline Pool() noexcept {
AllocateNewSlab();
}
inline ~Pool() noexcept {
std::free(current_slab);
for (char* slab : slabs) {
std::free(slab);
}
}
Pool(const Pool&) = delete;
Pool(Pool&&) = delete;
Pool& operator=(const Pool&) = delete;
Pool& operator=(Pool&&) = delete;
/// @brief Returns a pointer to an `object_size`-bytes block of memory.
[[nodiscard]] void* Alloc() noexcept {
if (remaining == 0) {
slabs.push_back(current_slab);
AllocateNewSlab();
}
void* ret = static_cast<void*>(current_ptr);
current_ptr += object_size;
remaining--;
return ret;
}
private:
/// @brief Allocates a completely new memory slab.
/// Used when an entirely new slab is needed
/// due the current one running out of usable space.
void AllocateNewSlab() noexcept {
current_slab = static_cast<char*>(std::malloc(object_size * slab_size));
current_ptr = current_slab;
remaining = slab_size;
}
std::vector<char*> slabs;
char* current_slab = nullptr;
char* current_ptr = nullptr;
size_t remaining = 0;
};
} // namespace Dynarmic::Common

View file

@ -13,7 +13,6 @@
#include <fmt/format.h> #include <fmt/format.h>
#include <mcl/assert.hpp> #include <mcl/assert.hpp>
#include "dynarmic/common/memory_pool.h"
#include "dynarmic/frontend/A32/a32_types.h" #include "dynarmic/frontend/A32/a32_types.h"
#include "dynarmic/frontend/A64/a64_types.h" #include "dynarmic/frontend/A64/a64_types.h"
#include "dynarmic/ir/cond.h" #include "dynarmic/ir/cond.h"
@ -24,8 +23,7 @@ namespace Dynarmic::IR {
Block::Block(const LocationDescriptor& location) Block::Block(const LocationDescriptor& location)
: location{location}, : location{location},
end_location{location}, end_location{location},
cond{Cond::AL}, cond{Cond::AL}
instruction_alloc_pool{std::make_unique<std::remove_reference_t<decltype(*instruction_alloc_pool)>>()}
{ {
} }
@ -37,7 +35,7 @@ Block::Block(const LocationDescriptor& location)
/// @param args A sequence of Value instances used as arguments for the instruction. /// @param args A sequence of Value instances used as arguments for the instruction.
/// @returns Iterator to the newly created instruction. /// @returns Iterator to the newly created instruction.
Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list<Value> args) noexcept { Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list<Value> args) noexcept {
IR::Inst* inst = new (instruction_alloc_pool->Alloc()) IR::Inst(opcode); IR::Inst* inst = new IR::Inst(opcode);
DEBUG_ASSERT(args.size() == inst->NumArgs()); DEBUG_ASSERT(args.size() == inst->NumArgs());
std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable { std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable {
inst->SetArg(index, arg); inst->SetArg(index, arg);
@ -83,9 +81,7 @@ static std::string TerminalToString(const Terminal& terminal_variant) noexcept {
} }
std::string DumpBlock(const IR::Block& block) noexcept { std::string DumpBlock(const IR::Block& block) noexcept {
std::string ret; std::string ret = fmt::format("Block: location={}\n", block.Location());
ret += fmt::format("Block: location={}\n", block.Location());
ret += fmt::format("cycles={}", block.CycleCount()); ret += fmt::format("cycles={}", block.CycleCount());
ret += fmt::format(", entry_cond={}", A64::CondToString(block.GetCondition())); ret += fmt::format(", entry_cond={}", A64::CondToString(block.GetCondition()));
if (block.GetCondition() != Cond::AL) { if (block.GetCondition() != Cond::AL) {
@ -113,6 +109,8 @@ std::string DumpBlock(const IR::Block& block) noexcept {
return fmt::format("#{:#x}", arg.GetU32()); return fmt::format("#{:#x}", arg.GetU32());
case Type::U64: case Type::U64:
return fmt::format("#{:#x}", arg.GetU64()); return fmt::format("#{:#x}", arg.GetU64());
case Type::U128:
return fmt::format("#<u128>");
case Type::A32Reg: case Type::A32Reg:
return A32::RegToString(arg.GetA32RegRef()); return A32::RegToString(arg.GetA32RegRef());
case Type::A32ExtReg: case Type::A32ExtReg:
@ -155,14 +153,9 @@ std::string DumpBlock(const IR::Block& block) noexcept {
ret += fmt::format("<type error: {} != {}>", GetNameOf(actual_type), GetNameOf(expected_type)); ret += fmt::format("<type error: {} != {}>", GetNameOf(actual_type), GetNameOf(expected_type));
} }
} }
ret += fmt::format(" (uses: {})\n", inst.UseCount());
ret += fmt::format(" (uses: {})", inst.UseCount());
ret += '\n';
} }
ret += "terminal = " + TerminalToString(block.GetTerminal()) + '\n'; ret += "terminal = " + TerminalToString(block.GetTerminal()) + '\n';
return ret; return ret;
} }

View file

@ -17,8 +17,6 @@
#include "dynarmic/ir/microinstruction.h" #include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/terminal.h" #include "dynarmic/ir/terminal.h"
#include "dynarmic/ir/value.h" #include "dynarmic/ir/value.h"
#include "dynarmic/ir/dense_list.h"
#include "dynarmic/common/memory_pool.h"
namespace Dynarmic::IR { namespace Dynarmic::IR {
@ -76,7 +74,7 @@ public:
/// @param op Opcode representing the instruction to add. /// @param op Opcode representing the instruction to add.
/// @param args A sequence of Value instances used as arguments for the instruction. /// @param args A sequence of Value instances used as arguments for the instruction.
inline void AppendNewInst(const Opcode opcode, const std::initializer_list<IR::Value> args) noexcept { inline void AppendNewInst(const Opcode opcode, const std::initializer_list<IR::Value> args) noexcept {
PrependNewInst(end(), opcode, args); PrependNewInst(instructions.end(), opcode, args);
} }
iterator PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list<Value> args) noexcept; iterator PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list<Value> args) noexcept;
@ -171,8 +169,6 @@ private:
LocationDescriptor end_location; LocationDescriptor end_location;
/// Conditional to pass in order to execute this block /// Conditional to pass in order to execute this block
Cond cond; Cond cond;
/// Memory pool for instruction list
std::unique_ptr<Common::Pool<sizeof(Inst), 2097152UL / sizeof(Inst)>> instruction_alloc_pool;
/// Terminal instruction of this block. /// Terminal instruction of this block.
Terminal terminal = Term::Invalid{}; Terminal terminal = Term::Invalid{};
/// Number of cycles this block takes to execute if the conditional fails. /// Number of cycles this block takes to execute if the conditional fails.

View file

@ -1,58 +0,0 @@
#pragma once
#include <cstdint>
#include <cstddef>
#include <deque>
namespace Dynarmic {
template<typename T> struct dense_list {
using difference_type = std::ptrdiff_t;
using size_type = std::size_t;
using value_type = T;
using pointer = value_type*;
using const_pointer = const value_type*;
using reference = value_type&;
using const_reference = const value_type&;
using iterator = std::deque<value_type>::iterator;
using const_iterator = std::deque<value_type>::const_iterator;
using reverse_iterator = std::reverse_iterator<iterator>;
using const_reverse_iterator = std::reverse_iterator<const_iterator>;
inline bool empty() const noexcept { return list.empty(); }
inline size_type size() const noexcept { return list.size(); }
inline value_type& front() noexcept { return list.front(); }
inline const value_type& front() const noexcept { return list.front(); }
inline value_type& back() noexcept { return list.back(); }
inline const value_type& back() const noexcept { return list.back(); }
inline iterator begin() noexcept { return list.begin(); }
inline const_iterator begin() const noexcept { return list.begin(); }
inline iterator end() noexcept { return list.end(); }
inline const_iterator end() const noexcept { return list.end(); }
inline reverse_iterator rbegin() noexcept { return list.rbegin(); }
inline const_reverse_iterator rbegin() const noexcept { return list.rbegin(); }
inline reverse_iterator rend() noexcept { return list.rend(); }
inline const_reverse_iterator rend() const noexcept { return list.rend(); }
inline const_iterator cbegin() const noexcept { return list.cbegin(); }
inline const_iterator cend() const noexcept { return list.cend(); }
inline const_reverse_iterator crbegin() const noexcept { return list.crbegin(); }
inline const_reverse_iterator crend() const noexcept { return list.crend(); }
inline iterator insert_before(iterator it, value_type& value) noexcept {
if (it == list.begin()) {
list.push_front(value);
return list.begin();
}
auto const index = std::distance(list.begin(), it - 1);
list.insert(it - 1, value);
return list.begin() + index;
}
std::deque<value_type> list;
};
}

View file

@ -2947,19 +2947,10 @@ public:
block.SetTerminal(terminal); block.SetTerminal(terminal);
} }
void SetInsertionPointBefore(IR::Inst* new_insertion_point) {
insertion_point = IR::Block::iterator{*new_insertion_point};
}
void SetInsertionPointBefore(IR::Block::iterator new_insertion_point) { void SetInsertionPointBefore(IR::Block::iterator new_insertion_point) {
insertion_point = new_insertion_point; insertion_point = new_insertion_point;
} }
void SetInsertionPointAfter(IR::Inst* new_insertion_point) {
insertion_point = IR::Block::iterator{*new_insertion_point};
++insertion_point;
}
void SetInsertionPointAfter(IR::Block::iterator new_insertion_point) { void SetInsertionPointAfter(IR::Block::iterator new_insertion_point) {
insertion_point = new_insertion_point; insertion_point = new_insertion_point;
++insertion_point; ++insertion_point;

View file

@ -22,8 +22,7 @@ namespace Dynarmic::Optimization {
namespace { namespace {
void FlagsPass(IR::Block& block) { void FlagsPass(IR::Block& block) {
using Iterator = std::reverse_iterator<IR::Block::iterator>; using Iterator = IR::Block::reverse_iterator;
struct FlagInfo { struct FlagInfo {
bool set_not_required = false; bool set_not_required = false;
bool has_value_request = false; bool has_value_request = false;
@ -185,10 +184,10 @@ void RegisterPass(IR::Block& block) {
using Iterator = IR::Block::iterator; using Iterator = IR::Block::iterator;
struct RegInfo { struct RegInfo {
IR::Value register_value;
std::optional<Iterator> last_set_instruction; std::optional<Iterator> last_set_instruction;
IR::Value register_value;
}; };
std::array<RegInfo, 15> reg_info; alignas(64) std::array<RegInfo, 15> reg_info;
const auto do_get = [](RegInfo& info, Iterator get_inst) { const auto do_get = [](RegInfo& info, Iterator get_inst) {
if (info.register_value.IsEmpty()) { if (info.register_value.IsEmpty()) {
@ -203,12 +202,12 @@ void RegisterPass(IR::Block& block) {
(*info.last_set_instruction)->Invalidate(); (*info.last_set_instruction)->Invalidate();
} }
info = { info = {
.register_value = value,
.last_set_instruction = set_inst, .last_set_instruction = set_inst,
.register_value = value,
}; };
}; };
enum class ExtValueType { enum class ExtValueType : std::uint8_t {
Empty, Empty,
Single, Single,
Double, Double,
@ -216,19 +215,20 @@ void RegisterPass(IR::Block& block) {
VectorQuad, VectorQuad,
}; };
struct ExtRegInfo { struct ExtRegInfo {
ExtValueType value_type = {};
IR::Value register_value; IR::Value register_value;
std::optional<Iterator> last_set_instruction; std::optional<Iterator> last_set_instruction;
ExtValueType value_type = {};
}; };
std::array<ExtRegInfo, 64> ext_reg_info; // Max returned by RegNumber = 31 (but multiplied by 4 in some cases)
alignas(64) std::array<ExtRegInfo, 128> ext_reg_info;
const auto do_ext_get = [](ExtValueType type, std::initializer_list<std::reference_wrapper<ExtRegInfo>> infos, Iterator get_inst) { const auto do_ext_get = [](ExtValueType type, std::initializer_list<std::reference_wrapper<ExtRegInfo>> infos, Iterator get_inst) {
if (!std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) { if (!std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) {
for (auto& info : infos) { for (auto& info : infos) {
info.get() = { info.get() = {
.value_type = type,
.register_value = IR::Value(&*get_inst), .register_value = IR::Value(&*get_inst),
.last_set_instruction = std::nullopt, .last_set_instruction = std::nullopt,
.value_type = type,
}; };
} }
return; return;
@ -244,9 +244,9 @@ void RegisterPass(IR::Block& block) {
} }
for (auto& info : infos) { for (auto& info : infos) {
info.get() = { info.get() = {
.value_type = type,
.register_value = value, .register_value = value,
.last_set_instruction = set_inst, .last_set_instruction = set_inst,
.value_type = type,
}; };
} }
}; };

View file

@ -17,7 +17,8 @@ void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) {
return; return;
} }
for (auto& inst : block) { for (auto iter = block.begin(); iter != block.end(); iter++) {
auto& inst = *iter;
if (inst.GetOpcode() != IR::Opcode::A64DataCacheOperationRaised) { if (inst.GetOpcode() != IR::Opcode::A64DataCacheOperationRaised) {
continue; continue;
} }
@ -26,7 +27,7 @@ void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) {
if (op == A64::DataCacheOperation::ZeroByVA) { if (op == A64::DataCacheOperation::ZeroByVA) {
A64::IREmitter ir{block}; A64::IREmitter ir{block};
ir.current_location = A64::LocationDescriptor{IR::LocationDescriptor{inst.GetArg(0).GetU64()}}; ir.current_location = A64::LocationDescriptor{IR::LocationDescriptor{inst.GetArg(0).GetU64()}};
ir.SetInsertionPointBefore(&inst); ir.SetInsertionPointBefore(iter);
size_t bytes = 4 << static_cast<size_t>(conf.dczid_el0 & 0b1111); size_t bytes = 4 << static_cast<size_t>(conf.dczid_el0 & 0b1111);
IR::U64 addr{inst.GetArg(2)}; IR::U64 addr{inst.GetArg(2)};

View file

@ -22,9 +22,9 @@ using Op = Dynarmic::IR::Opcode;
namespace { namespace {
// Tiny helper to avoid the need to store based off the opcode /// Tiny helper to avoid the need to store based off the opcode
// bit size all over the place within folding functions. /// bit size all over the place within folding functions.
void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) { static void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) {
if (is_32_bit) { if (is_32_bit) {
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)}); inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)});
} else { } else {
@ -32,12 +32,12 @@ void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) {
} }
} }
IR::Value Value(bool is_32_bit, u64 value) { static IR::Value Value(bool is_32_bit, u64 value) {
return is_32_bit ? IR::Value{static_cast<u32>(value)} : IR::Value{value}; return is_32_bit ? IR::Value{static_cast<u32>(value)} : IR::Value{value};
} }
template<typename ImmFn> template<typename ImmFn>
bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) { static bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) {
const auto lhs = inst.GetArg(0); const auto lhs = inst.GetArg(0);
const auto rhs = inst.GetArg(1); const auto rhs = inst.GetArg(1);
@ -75,7 +75,7 @@ bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) {
return true; return true;
} }
void FoldAdd(IR::Inst& inst, bool is_32_bit) { static void FoldAdd(IR::Inst& inst, bool is_32_bit) {
const auto lhs = inst.GetArg(0); const auto lhs = inst.GetArg(0);
const auto rhs = inst.GetArg(1); const auto rhs = inst.GetArg(1);
const auto carry = inst.GetArg(2); const auto carry = inst.GetArg(2);
@ -125,7 +125,7 @@ void FoldAdd(IR::Inst& inst, bool is_32_bit) {
/// 4. x & y -> y (where x has all bits set to 1) /// 4. x & y -> y (where x has all bits set to 1)
/// 5. x & y -> x (where y has all bits set to 1) /// 5. x & y -> x (where y has all bits set to 1)
/// ///
void FoldAND(IR::Inst& inst, bool is_32_bit) { static void FoldAND(IR::Inst& inst, bool is_32_bit) {
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a & b; })) { if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a & b; })) {
const auto rhs = inst.GetArg(1); const auto rhs = inst.GetArg(1);
if (rhs.IsZero()) { if (rhs.IsZero()) {
@ -140,7 +140,7 @@ void FoldAND(IR::Inst& inst, bool is_32_bit) {
/// ///
/// 1. imm -> swap(imm) /// 1. imm -> swap(imm)
/// ///
void FoldByteReverse(IR::Inst& inst, Op op) { static void FoldByteReverse(IR::Inst& inst, Op op) {
const auto operand = inst.GetArg(0); const auto operand = inst.GetArg(0);
if (!operand.IsImmediate()) { if (!operand.IsImmediate()) {
@ -165,7 +165,7 @@ void FoldByteReverse(IR::Inst& inst, Op op) {
/// 2. imm_x / imm_y -> result /// 2. imm_x / imm_y -> result
/// 3. x / 1 -> x /// 3. x / 1 -> x
/// ///
void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) { static void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) {
const auto rhs = inst.GetArg(1); const auto rhs = inst.GetArg(1);
if (rhs.IsZero()) { if (rhs.IsZero()) {
@ -193,7 +193,7 @@ void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) {
// 2. x ^ 0 -> x // 2. x ^ 0 -> x
// 3. 0 ^ y -> y // 3. 0 ^ y -> y
// //
void FoldEOR(IR::Inst& inst, bool is_32_bit) { static void FoldEOR(IR::Inst& inst, bool is_32_bit) {
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a ^ b; })) { if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a ^ b; })) {
const auto rhs = inst.GetArg(1); const auto rhs = inst.GetArg(1);
if (rhs.IsZero()) { if (rhs.IsZero()) {
@ -202,7 +202,7 @@ void FoldEOR(IR::Inst& inst, bool is_32_bit) {
} }
} }
void FoldLeastSignificantByte(IR::Inst& inst) { static void FoldLeastSignificantByte(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) { if (!inst.AreAllArgsImmediates()) {
return; return;
} }
@ -211,7 +211,7 @@ void FoldLeastSignificantByte(IR::Inst& inst) {
inst.ReplaceUsesWith(IR::Value{static_cast<u8>(operand.GetImmediateAsU64())}); inst.ReplaceUsesWith(IR::Value{static_cast<u8>(operand.GetImmediateAsU64())});
} }
void FoldLeastSignificantHalf(IR::Inst& inst) { static void FoldLeastSignificantHalf(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) { if (!inst.AreAllArgsImmediates()) {
return; return;
} }
@ -220,7 +220,7 @@ void FoldLeastSignificantHalf(IR::Inst& inst) {
inst.ReplaceUsesWith(IR::Value{static_cast<u16>(operand.GetImmediateAsU64())}); inst.ReplaceUsesWith(IR::Value{static_cast<u16>(operand.GetImmediateAsU64())});
} }
void FoldLeastSignificantWord(IR::Inst& inst) { static void FoldLeastSignificantWord(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) { if (!inst.AreAllArgsImmediates()) {
return; return;
} }
@ -229,7 +229,7 @@ void FoldLeastSignificantWord(IR::Inst& inst) {
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(operand.GetImmediateAsU64())}); inst.ReplaceUsesWith(IR::Value{static_cast<u32>(operand.GetImmediateAsU64())});
} }
void FoldMostSignificantBit(IR::Inst& inst) { static void FoldMostSignificantBit(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) { if (!inst.AreAllArgsImmediates()) {
return; return;
} }
@ -238,7 +238,7 @@ void FoldMostSignificantBit(IR::Inst& inst) {
inst.ReplaceUsesWith(IR::Value{(operand.GetImmediateAsU64() >> 31) != 0}); inst.ReplaceUsesWith(IR::Value{(operand.GetImmediateAsU64() >> 31) != 0});
} }
void FoldMostSignificantWord(IR::Inst& inst) { static void FoldMostSignificantWord(IR::Inst& inst) {
IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp);
if (!inst.AreAllArgsImmediates()) { if (!inst.AreAllArgsImmediates()) {
@ -260,7 +260,7 @@ void FoldMostSignificantWord(IR::Inst& inst) {
// 4. x * 1 -> x // 4. x * 1 -> x
// 5. 1 * y -> y // 5. 1 * y -> y
// //
void FoldMultiply(IR::Inst& inst, bool is_32_bit) { static void FoldMultiply(IR::Inst& inst, bool is_32_bit) {
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a * b; })) { if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a * b; })) {
const auto rhs = inst.GetArg(1); const auto rhs = inst.GetArg(1);
if (rhs.IsZero()) { if (rhs.IsZero()) {
@ -272,7 +272,7 @@ void FoldMultiply(IR::Inst& inst, bool is_32_bit) {
} }
// Folds NOT operations if the contained value is an immediate. // Folds NOT operations if the contained value is an immediate.
void FoldNOT(IR::Inst& inst, bool is_32_bit) { static void FoldNOT(IR::Inst& inst, bool is_32_bit) {
const auto operand = inst.GetArg(0); const auto operand = inst.GetArg(0);
if (!operand.IsImmediate()) { if (!operand.IsImmediate()) {
@ -289,7 +289,7 @@ void FoldNOT(IR::Inst& inst, bool is_32_bit) {
// 2. x | 0 -> x // 2. x | 0 -> x
// 3. 0 | y -> y // 3. 0 | y -> y
// //
void FoldOR(IR::Inst& inst, bool is_32_bit) { static void FoldOR(IR::Inst& inst, bool is_32_bit) {
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a | b; })) { if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a | b; })) {
const auto rhs = inst.GetArg(1); const auto rhs = inst.GetArg(1);
if (rhs.IsZero()) { if (rhs.IsZero()) {
@ -298,7 +298,7 @@ void FoldOR(IR::Inst& inst, bool is_32_bit) {
} }
} }
bool FoldShifts(IR::Inst& inst) { static bool FoldShifts(IR::Inst& inst) {
IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp); IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp);
// The 32-bit variants can contain 3 arguments, while the // The 32-bit variants can contain 3 arguments, while the
@ -328,7 +328,7 @@ bool FoldShifts(IR::Inst& inst) {
return true; return true;
} }
void FoldSignExtendXToWord(IR::Inst& inst) { static void FoldSignExtendXToWord(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) { if (!inst.AreAllArgsImmediates()) {
return; return;
} }
@ -337,7 +337,7 @@ void FoldSignExtendXToWord(IR::Inst& inst) {
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)}); inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)});
} }
void FoldSignExtendXToLong(IR::Inst& inst) { static void FoldSignExtendXToLong(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) { if (!inst.AreAllArgsImmediates()) {
return; return;
} }
@ -346,7 +346,7 @@ void FoldSignExtendXToLong(IR::Inst& inst) {
inst.ReplaceUsesWith(IR::Value{static_cast<u64>(value)}); inst.ReplaceUsesWith(IR::Value{static_cast<u64>(value)});
} }
void FoldSub(IR::Inst& inst, bool is_32_bit) { static void FoldSub(IR::Inst& inst, bool is_32_bit) {
if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) {
return; return;
} }
@ -359,7 +359,7 @@ void FoldSub(IR::Inst& inst, bool is_32_bit) {
ReplaceUsesWith(inst, is_32_bit, result); ReplaceUsesWith(inst, is_32_bit, result);
} }
void FoldZeroExtendXToWord(IR::Inst& inst) { static void FoldZeroExtendXToWord(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) { if (!inst.AreAllArgsImmediates()) {
return; return;
} }
@ -368,7 +368,7 @@ void FoldZeroExtendXToWord(IR::Inst& inst) {
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)}); inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)});
} }
void FoldZeroExtendXToLong(IR::Inst& inst) { static void FoldZeroExtendXToLong(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) { if (!inst.AreAllArgsImmediates()) {
return; return;
} }

View file

@ -13,11 +13,8 @@ namespace Dynarmic::Optimization {
void IdentityRemovalPass(IR::Block& block) { void IdentityRemovalPass(IR::Block& block) {
std::vector<IR::Inst*> to_invalidate; std::vector<IR::Inst*> to_invalidate;
for (auto iter = block.begin(); iter != block.end(); ) {
auto iter = block.begin();
while (iter != block.end()) {
IR::Inst& inst = *iter; IR::Inst& inst = *iter;
const size_t num_args = inst.NumArgs(); const size_t num_args = inst.NumArgs();
for (size_t i = 0; i < num_args; i++) { for (size_t i = 0; i < num_args; i++) {
while (true) { while (true) {
@ -27,18 +24,15 @@ void IdentityRemovalPass(IR::Block& block) {
inst.SetArg(i, arg.GetInst()->GetArg(0)); inst.SetArg(i, arg.GetInst()->GetArg(0));
} }
} }
if (inst.GetOpcode() == IR::Opcode::Identity || inst.GetOpcode() == IR::Opcode::Void) { if (inst.GetOpcode() == IR::Opcode::Identity || inst.GetOpcode() == IR::Opcode::Void) {
iter = block.Instructions().erase(inst); iter = block.Instructions().erase(iter);
to_invalidate.push_back(&inst); to_invalidate.push_back(&inst);
} else { } else {
++iter; ++iter;
} }
} }
for (auto* inst : to_invalidate)
for (IR::Inst* inst : to_invalidate) {
inst->Invalidate(); inst->Invalidate();
}
} }
} // namespace Dynarmic::Optimization } // namespace Dynarmic::Optimization

View file

@ -13,7 +13,7 @@ namespace Dynarmic::Optimization {
namespace { namespace {
void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) { static void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) {
const IR::U128 x = (IR::U128)inst.GetArg(0); const IR::U128 x = (IR::U128)inst.GetArg(0);
const IR::U128 y = (IR::U128)inst.GetArg(1); const IR::U128 y = (IR::U128)inst.GetArg(1);
@ -37,13 +37,14 @@ void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) {
inst.ReplaceUsesWith(result); inst.ReplaceUsesWith(result);
} }
void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) { static void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) {
const IR::U128 x = (IR::U128)inst.GetArg(0); const IR::U128 x = (IR::U128)inst.GetArg(0);
const IR::U128 y = (IR::U128)inst.GetArg(1); const IR::U128 y = (IR::U128)inst.GetArg(1);
const IR::U128 z = (IR::U128)inst.GetArg(2); const IR::U128 z = (IR::U128)inst.GetArg(2);
const IR::U128 T0 = ir.VectorExtract(y, z, 32); const IR::U128 T0 = ir.VectorExtract(y, z, 32);
// TODO: this can use better pipelining m8
const IR::U128 lower_half = [&] { const IR::U128 lower_half = [&] {
const IR::U128 T = ir.VectorRotateWholeVectorRight(z, 64); const IR::U128 T = ir.VectorRotateWholeVectorRight(z, 64);
const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17); const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17);
@ -73,15 +74,15 @@ void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) {
inst.ReplaceUsesWith(result); inst.ReplaceUsesWith(result);
} }
IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { static IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) {
return ir.Eor(ir.And(ir.Eor(y, z), x), z); return ir.Eor(ir.And(ir.Eor(y, z), x), z);
} }
IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) { static IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) {
return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z)); return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z));
} }
IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) { static IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) {
const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2)); const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2));
const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13)); const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13));
const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22)); const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22));
@ -89,7 +90,7 @@ IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) {
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); return ir.Eor(tmp1, ir.Eor(tmp2, tmp3));
} }
IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) { static IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) {
const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6)); const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6));
const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11)); const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11));
const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25)); const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25));
@ -97,7 +98,7 @@ IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) {
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3)); return ir.Eor(tmp1, ir.Eor(tmp2, tmp3));
} }
void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) { static void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) {
IR::U128 x = (IR::U128)inst.GetArg(0); IR::U128 x = (IR::U128)inst.GetArg(0);
IR::U128 y = (IR::U128)inst.GetArg(1); IR::U128 y = (IR::U128)inst.GetArg(1);
const IR::U128 w = (IR::U128)inst.GetArg(2); const IR::U128 w = (IR::U128)inst.GetArg(2);
@ -139,7 +140,7 @@ void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) {
} }
template<size_t esize, bool is_signed> template<size_t esize, bool is_signed>
void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) { static void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) {
IR::U128 n = (IR::U128)inst.GetArg(0); IR::U128 n = (IR::U128)inst.GetArg(0);
IR::U128 m = (IR::U128)inst.GetArg(1); IR::U128 m = (IR::U128)inst.GetArg(1);
@ -159,54 +160,52 @@ void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) {
} }
IR::IREmitter ir{block}; IR::IREmitter ir{block};
for (auto iter = block.begin(); iter != block.end(); iter++) {
for (auto& inst : block) { ir.SetInsertionPointBefore(iter);
ir.SetInsertionPointBefore(&inst); switch (iter->GetOpcode()) {
switch (inst.GetOpcode()) {
case IR::Opcode::SHA256MessageSchedule0: case IR::Opcode::SHA256MessageSchedule0:
if (polyfill.sha256) { if (polyfill.sha256) {
PolyfillSHA256MessageSchedule0(ir, inst); PolyfillSHA256MessageSchedule0(ir, *iter);
} }
break; break;
case IR::Opcode::SHA256MessageSchedule1: case IR::Opcode::SHA256MessageSchedule1:
if (polyfill.sha256) { if (polyfill.sha256) {
PolyfillSHA256MessageSchedule1(ir, inst); PolyfillSHA256MessageSchedule1(ir, *iter);
} }
break; break;
case IR::Opcode::SHA256Hash: case IR::Opcode::SHA256Hash:
if (polyfill.sha256) { if (polyfill.sha256) {
PolyfillSHA256Hash(ir, inst); PolyfillSHA256Hash(ir, *iter);
} }
break; break;
case IR::Opcode::VectorMultiplySignedWiden8: case IR::Opcode::VectorMultiplySignedWiden8:
if (polyfill.vector_multiply_widen) { if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<8, true>(ir, inst); PolyfillVectorMultiplyWiden<8, true>(ir, *iter);
} }
break; break;
case IR::Opcode::VectorMultiplySignedWiden16: case IR::Opcode::VectorMultiplySignedWiden16:
if (polyfill.vector_multiply_widen) { if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<16, true>(ir, inst); PolyfillVectorMultiplyWiden<16, true>(ir, *iter);
} }
break; break;
case IR::Opcode::VectorMultiplySignedWiden32: case IR::Opcode::VectorMultiplySignedWiden32:
if (polyfill.vector_multiply_widen) { if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<32, true>(ir, inst); PolyfillVectorMultiplyWiden<32, true>(ir, *iter);
} }
break; break;
case IR::Opcode::VectorMultiplyUnsignedWiden8: case IR::Opcode::VectorMultiplyUnsignedWiden8:
if (polyfill.vector_multiply_widen) { if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<8, false>(ir, inst); PolyfillVectorMultiplyWiden<8, false>(ir, *iter);
} }
break; break;
case IR::Opcode::VectorMultiplyUnsignedWiden16: case IR::Opcode::VectorMultiplyUnsignedWiden16:
if (polyfill.vector_multiply_widen) { if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<16, false>(ir, inst); PolyfillVectorMultiplyWiden<16, false>(ir, *iter);
} }
break; break;
case IR::Opcode::VectorMultiplyUnsignedWiden32: case IR::Opcode::VectorMultiplyUnsignedWiden32:
if (polyfill.vector_multiply_widen) { if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<32, false>(ir, inst); PolyfillVectorMultiplyWiden<32, false>(ir, *iter);
} }
break; break;
default: default: