[dynarmic] remove memory pool (standard malloc does a better job)

This commit is contained in:
lizzie 2025-07-19 00:51:47 +01:00
parent 3164b05670
commit 2710ea7d8a
16 changed files with 85 additions and 264 deletions

View file

@ -53,8 +53,6 @@ add_library(dynarmic
common/lut_from_list.h
common/math_util.cpp
common/math_util.h
common/memory_pool.cpp
common/memory_pool.h
common/safe_ops.h
common/spin_lock.h
common/string_util.h
@ -153,6 +151,7 @@ if ("A64" IN_LIST DYNARMIC_FRONTENDS)
ir/opt/a64_callback_config_pass.cpp
ir/opt/a64_get_set_elimination_pass.cpp
ir/opt/a64_merge_interpret_blocks.cpp
ir/opt/x64_peepholes.cpp
)
endif()

View file

@ -35,11 +35,6 @@ EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block)
EmitContext::~EmitContext() = default;
void EmitContext::EraseInstruction(IR::Inst* inst) {
block.Instructions().erase(inst);
inst->ClearArgs();
}
EmitX64::EmitX64(BlockOfCode& code)
: code(code) {
exception_handler.Register(code);

View file

@ -54,10 +54,7 @@ struct EmitContext {
EmitContext(RegAlloc& reg_alloc, IR::Block& block);
virtual ~EmitContext();
void EraseInstruction(IR::Inst* inst);
virtual FP::FPCR FPCR(bool fpcr_controlled = true) const = 0;
virtual bool HasOptimization(OptimizationFlag flag) const = 0;
RegAlloc& reg_alloc;

View file

@ -40,7 +40,6 @@ static void EmitCRC32Castagnoli(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, const int data_size) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size < 32) {
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg64 value = ctx.reg_alloc.UseScratchGpr(args[1]);
@ -69,10 +68,7 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
code.pextrd(crc, xmm_value, 2);
ctx.reg_alloc.DefineValue(inst, crc);
return;
}
if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 32) {
} else if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 32) {
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(args[1]).cvt32();
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
@ -90,10 +86,7 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
code.pextrd(crc, xmm_value, 2);
ctx.reg_alloc.DefineValue(inst, crc);
return;
}
if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 64) {
} else if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 64) {
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]);
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
@ -111,12 +104,11 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
code.pextrd(crc, xmm_value, 2);
ctx.reg_alloc.DefineValue(inst, crc);
return;
} else {
ctx.reg_alloc.HostCall(inst, args[0], args[1], {});
code.mov(code.ABI_PARAM3, data_size / CHAR_BIT);
code.CallFunction(&CRC32::ComputeCRC32ISO);
}
ctx.reg_alloc.HostCall(inst, args[0], args[1], {});
code.mov(code.ABI_PARAM3, data_size / CHAR_BIT);
code.CallFunction(&CRC32::ComputeCRC32ISO);
}
void EmitX64::EmitCRC32Castagnoli8(EmitContext& ctx, IR::Inst* inst) {

View file

@ -236,23 +236,19 @@ void DenormalsAreZero(BlockOfCode& code, FP::FPCR fpcr, std::initializer_list<Xb
FpFixup::Norm_Src,
FpFixup::Norm_Src,
FpFixup::Norm_Src,
FpFixup::Norm_Src);
FpFixup::Norm_Src
);
FCODE(vmovap)(tmp, code.BConst<fsize>(xword, denormal_to_zero));
for (const Xbyak::Xmm& xmm : to_daz) {
for (const Xbyak::Xmm& xmm : to_daz)
FCODE(vfixupimmp)(xmm, xmm, tmp, u8(0));
}
return;
}
if (fpcr.RMode() != FP::RoundingMode::TowardsMinusInfinity) {
code.movaps(tmp, GetNegativeZeroVector<fsize>(code));
} else {
code.xorps(tmp, tmp);
}
for (const Xbyak::Xmm& xmm : to_daz) {
FCODE(addp)(xmm, tmp);
if (fpcr.RMode() != FP::RoundingMode::TowardsMinusInfinity) {
code.movaps(tmp, GetNegativeZeroVector<fsize>(code));
} else {
code.xorps(tmp, tmp);
}
for (const Xbyak::Xmm& xmm : to_daz)
FCODE(addp)(xmm, tmp);
}
}
}

View file

@ -1,13 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/common/memory_pool.h"
#include <cstdlib>
namespace Dynarmic::Common {
} // namespace Dynarmic::Common

View file

@ -1,61 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#pragma once
#include <cstddef>
#include <vector>
namespace Dynarmic::Common {
/// @tparam object_size Byte-size of objects to construct
/// @tparam slab_size Number of objects to have per slab
template<size_t object_size, size_t slab_size>
class Pool {
public:
inline Pool() noexcept {
AllocateNewSlab();
}
inline ~Pool() noexcept {
std::free(current_slab);
for (char* slab : slabs) {
std::free(slab);
}
}
Pool(const Pool&) = delete;
Pool(Pool&&) = delete;
Pool& operator=(const Pool&) = delete;
Pool& operator=(Pool&&) = delete;
/// @brief Returns a pointer to an `object_size`-bytes block of memory.
[[nodiscard]] void* Alloc() noexcept {
if (remaining == 0) {
slabs.push_back(current_slab);
AllocateNewSlab();
}
void* ret = static_cast<void*>(current_ptr);
current_ptr += object_size;
remaining--;
return ret;
}
private:
/// @brief Allocates a completely new memory slab.
/// Used when an entirely new slab is needed
/// due the current one running out of usable space.
void AllocateNewSlab() noexcept {
current_slab = static_cast<char*>(std::malloc(object_size * slab_size));
current_ptr = current_slab;
remaining = slab_size;
}
std::vector<char*> slabs;
char* current_slab = nullptr;
char* current_ptr = nullptr;
size_t remaining = 0;
};
} // namespace Dynarmic::Common

View file

@ -13,7 +13,6 @@
#include <fmt/format.h>
#include <mcl/assert.hpp>
#include "dynarmic/common/memory_pool.h"
#include "dynarmic/frontend/A32/a32_types.h"
#include "dynarmic/frontend/A64/a64_types.h"
#include "dynarmic/ir/cond.h"
@ -24,8 +23,7 @@ namespace Dynarmic::IR {
Block::Block(const LocationDescriptor& location)
: location{location},
end_location{location},
cond{Cond::AL},
instruction_alloc_pool{std::make_unique<std::remove_reference_t<decltype(*instruction_alloc_pool)>>()}
cond{Cond::AL}
{
}
@ -37,7 +35,7 @@ Block::Block(const LocationDescriptor& location)
/// @param args A sequence of Value instances used as arguments for the instruction.
/// @returns Iterator to the newly created instruction.
Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list<Value> args) noexcept {
IR::Inst* inst = new (instruction_alloc_pool->Alloc()) IR::Inst(opcode);
IR::Inst* inst = new IR::Inst(opcode);
DEBUG_ASSERT(args.size() == inst->NumArgs());
std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable {
inst->SetArg(index, arg);
@ -83,9 +81,7 @@ static std::string TerminalToString(const Terminal& terminal_variant) noexcept {
}
std::string DumpBlock(const IR::Block& block) noexcept {
std::string ret;
ret += fmt::format("Block: location={}\n", block.Location());
std::string ret = fmt::format("Block: location={}\n", block.Location());
ret += fmt::format("cycles={}", block.CycleCount());
ret += fmt::format(", entry_cond={}", A64::CondToString(block.GetCondition()));
if (block.GetCondition() != Cond::AL) {
@ -113,6 +109,8 @@ std::string DumpBlock(const IR::Block& block) noexcept {
return fmt::format("#{:#x}", arg.GetU32());
case Type::U64:
return fmt::format("#{:#x}", arg.GetU64());
case Type::U128:
return fmt::format("#<u128>");
case Type::A32Reg:
return A32::RegToString(arg.GetA32RegRef());
case Type::A32ExtReg:
@ -155,14 +153,9 @@ std::string DumpBlock(const IR::Block& block) noexcept {
ret += fmt::format("<type error: {} != {}>", GetNameOf(actual_type), GetNameOf(expected_type));
}
}
ret += fmt::format(" (uses: {})", inst.UseCount());
ret += '\n';
ret += fmt::format(" (uses: {})\n", inst.UseCount());
}
ret += "terminal = " + TerminalToString(block.GetTerminal()) + '\n';
return ret;
}

View file

@ -17,8 +17,6 @@
#include "dynarmic/ir/microinstruction.h"
#include "dynarmic/ir/terminal.h"
#include "dynarmic/ir/value.h"
#include "dynarmic/ir/dense_list.h"
#include "dynarmic/common/memory_pool.h"
namespace Dynarmic::IR {
@ -76,7 +74,7 @@ public:
/// @param op Opcode representing the instruction to add.
/// @param args A sequence of Value instances used as arguments for the instruction.
inline void AppendNewInst(const Opcode opcode, const std::initializer_list<IR::Value> args) noexcept {
PrependNewInst(end(), opcode, args);
PrependNewInst(instructions.end(), opcode, args);
}
iterator PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list<Value> args) noexcept;
@ -171,8 +169,6 @@ private:
LocationDescriptor end_location;
/// Conditional to pass in order to execute this block
Cond cond;
/// Memory pool for instruction list
std::unique_ptr<Common::Pool<sizeof(Inst), 2097152UL / sizeof(Inst)>> instruction_alloc_pool;
/// Terminal instruction of this block.
Terminal terminal = Term::Invalid{};
/// Number of cycles this block takes to execute if the conditional fails.

View file

@ -1,58 +0,0 @@
#pragma once
#include <cstdint>
#include <cstddef>
#include <deque>
namespace Dynarmic {
template<typename T> struct dense_list {
using difference_type = std::ptrdiff_t;
using size_type = std::size_t;
using value_type = T;
using pointer = value_type*;
using const_pointer = const value_type*;
using reference = value_type&;
using const_reference = const value_type&;
using iterator = std::deque<value_type>::iterator;
using const_iterator = std::deque<value_type>::const_iterator;
using reverse_iterator = std::reverse_iterator<iterator>;
using const_reverse_iterator = std::reverse_iterator<const_iterator>;
inline bool empty() const noexcept { return list.empty(); }
inline size_type size() const noexcept { return list.size(); }
inline value_type& front() noexcept { return list.front(); }
inline const value_type& front() const noexcept { return list.front(); }
inline value_type& back() noexcept { return list.back(); }
inline const value_type& back() const noexcept { return list.back(); }
inline iterator begin() noexcept { return list.begin(); }
inline const_iterator begin() const noexcept { return list.begin(); }
inline iterator end() noexcept { return list.end(); }
inline const_iterator end() const noexcept { return list.end(); }
inline reverse_iterator rbegin() noexcept { return list.rbegin(); }
inline const_reverse_iterator rbegin() const noexcept { return list.rbegin(); }
inline reverse_iterator rend() noexcept { return list.rend(); }
inline const_reverse_iterator rend() const noexcept { return list.rend(); }
inline const_iterator cbegin() const noexcept { return list.cbegin(); }
inline const_iterator cend() const noexcept { return list.cend(); }
inline const_reverse_iterator crbegin() const noexcept { return list.crbegin(); }
inline const_reverse_iterator crend() const noexcept { return list.crend(); }
inline iterator insert_before(iterator it, value_type& value) noexcept {
if (it == list.begin()) {
list.push_front(value);
return list.begin();
}
auto const index = std::distance(list.begin(), it - 1);
list.insert(it - 1, value);
return list.begin() + index;
}
std::deque<value_type> list;
};
}

View file

@ -2947,19 +2947,10 @@ public:
block.SetTerminal(terminal);
}
void SetInsertionPointBefore(IR::Inst* new_insertion_point) {
insertion_point = IR::Block::iterator{*new_insertion_point};
}
void SetInsertionPointBefore(IR::Block::iterator new_insertion_point) {
insertion_point = new_insertion_point;
}
void SetInsertionPointAfter(IR::Inst* new_insertion_point) {
insertion_point = IR::Block::iterator{*new_insertion_point};
++insertion_point;
}
void SetInsertionPointAfter(IR::Block::iterator new_insertion_point) {
insertion_point = new_insertion_point;
++insertion_point;

View file

@ -22,8 +22,7 @@ namespace Dynarmic::Optimization {
namespace {
void FlagsPass(IR::Block& block) {
using Iterator = std::reverse_iterator<IR::Block::iterator>;
using Iterator = IR::Block::reverse_iterator;
struct FlagInfo {
bool set_not_required = false;
bool has_value_request = false;
@ -185,10 +184,10 @@ void RegisterPass(IR::Block& block) {
using Iterator = IR::Block::iterator;
struct RegInfo {
IR::Value register_value;
std::optional<Iterator> last_set_instruction;
IR::Value register_value;
};
std::array<RegInfo, 15> reg_info;
alignas(64) std::array<RegInfo, 15> reg_info;
const auto do_get = [](RegInfo& info, Iterator get_inst) {
if (info.register_value.IsEmpty()) {
@ -203,12 +202,12 @@ void RegisterPass(IR::Block& block) {
(*info.last_set_instruction)->Invalidate();
}
info = {
.register_value = value,
.last_set_instruction = set_inst,
.register_value = value,
};
};
enum class ExtValueType {
enum class ExtValueType : std::uint8_t {
Empty,
Single,
Double,
@ -216,19 +215,20 @@ void RegisterPass(IR::Block& block) {
VectorQuad,
};
struct ExtRegInfo {
ExtValueType value_type = {};
IR::Value register_value;
std::optional<Iterator> last_set_instruction;
ExtValueType value_type = {};
};
std::array<ExtRegInfo, 64> ext_reg_info;
// Max returned by RegNumber = 31 (but multiplied by 4 in some cases)
alignas(64) std::array<ExtRegInfo, 128> ext_reg_info;
const auto do_ext_get = [](ExtValueType type, std::initializer_list<std::reference_wrapper<ExtRegInfo>> infos, Iterator get_inst) {
if (!std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) {
for (auto& info : infos) {
info.get() = {
.value_type = type,
.register_value = IR::Value(&*get_inst),
.last_set_instruction = std::nullopt,
.value_type = type,
};
}
return;
@ -244,9 +244,9 @@ void RegisterPass(IR::Block& block) {
}
for (auto& info : infos) {
info.get() = {
.value_type = type,
.register_value = value,
.last_set_instruction = set_inst,
.value_type = type,
};
}
};

View file

@ -17,7 +17,8 @@ void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) {
return;
}
for (auto& inst : block) {
for (auto iter = block.begin(); iter != block.end(); iter++) {
auto& inst = *iter;
if (inst.GetOpcode() != IR::Opcode::A64DataCacheOperationRaised) {
continue;
}
@ -26,7 +27,7 @@ void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) {
if (op == A64::DataCacheOperation::ZeroByVA) {
A64::IREmitter ir{block};
ir.current_location = A64::LocationDescriptor{IR::LocationDescriptor{inst.GetArg(0).GetU64()}};
ir.SetInsertionPointBefore(&inst);
ir.SetInsertionPointBefore(iter);
size_t bytes = 4 << static_cast<size_t>(conf.dczid_el0 & 0b1111);
IR::U64 addr{inst.GetArg(2)};

View file

@ -22,9 +22,9 @@ using Op = Dynarmic::IR::Opcode;
namespace {
// Tiny helper to avoid the need to store based off the opcode
// bit size all over the place within folding functions.
void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) {
/// Tiny helper to avoid the need to store based off the opcode
/// bit size all over the place within folding functions.
static void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) {
if (is_32_bit) {
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)});
} else {
@ -32,12 +32,12 @@ void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) {
}
}
IR::Value Value(bool is_32_bit, u64 value) {
static IR::Value Value(bool is_32_bit, u64 value) {
return is_32_bit ? IR::Value{static_cast<u32>(value)} : IR::Value{value};
}
template<typename ImmFn>
bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) {
static bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) {
const auto lhs = inst.GetArg(0);
const auto rhs = inst.GetArg(1);
@ -75,7 +75,7 @@ bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) {
return true;
}
void FoldAdd(IR::Inst& inst, bool is_32_bit) {
static void FoldAdd(IR::Inst& inst, bool is_32_bit) {
const auto lhs = inst.GetArg(0);
const auto rhs = inst.GetArg(1);
const auto carry = inst.GetArg(2);
@ -125,7 +125,7 @@ void FoldAdd(IR::Inst& inst, bool is_32_bit) {
/// 4. x & y -> y (where x has all bits set to 1)
/// 5. x & y -> x (where y has all bits set to 1)
///
void FoldAND(IR::Inst& inst, bool is_32_bit) {
static void FoldAND(IR::Inst& inst, bool is_32_bit) {
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a & b; })) {
const auto rhs = inst.GetArg(1);
if (rhs.IsZero()) {
@ -140,7 +140,7 @@ void FoldAND(IR::Inst& inst, bool is_32_bit) {
///
/// 1. imm -> swap(imm)
///
void FoldByteReverse(IR::Inst& inst, Op op) {
static void FoldByteReverse(IR::Inst& inst, Op op) {
const auto operand = inst.GetArg(0);
if (!operand.IsImmediate()) {
@ -165,7 +165,7 @@ void FoldByteReverse(IR::Inst& inst, Op op) {
/// 2. imm_x / imm_y -> result
/// 3. x / 1 -> x
///
void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) {
static void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) {
const auto rhs = inst.GetArg(1);
if (rhs.IsZero()) {
@ -193,7 +193,7 @@ void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) {
// 2. x ^ 0 -> x
// 3. 0 ^ y -> y
//
void FoldEOR(IR::Inst& inst, bool is_32_bit) {
static void FoldEOR(IR::Inst& inst, bool is_32_bit) {
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a ^ b; })) {
const auto rhs = inst.GetArg(1);
if (rhs.IsZero()) {
@ -202,7 +202,7 @@ void FoldEOR(IR::Inst& inst, bool is_32_bit) {
}
}
void FoldLeastSignificantByte(IR::Inst& inst) {
static void FoldLeastSignificantByte(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) {
return;
}
@ -211,7 +211,7 @@ void FoldLeastSignificantByte(IR::Inst& inst) {
inst.ReplaceUsesWith(IR::Value{static_cast<u8>(operand.GetImmediateAsU64())});
}
void FoldLeastSignificantHalf(IR::Inst& inst) {
static void FoldLeastSignificantHalf(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) {
return;
}
@ -220,7 +220,7 @@ void FoldLeastSignificantHalf(IR::Inst& inst) {
inst.ReplaceUsesWith(IR::Value{static_cast<u16>(operand.GetImmediateAsU64())});
}
void FoldLeastSignificantWord(IR::Inst& inst) {
static void FoldLeastSignificantWord(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) {
return;
}
@ -229,7 +229,7 @@ void FoldLeastSignificantWord(IR::Inst& inst) {
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(operand.GetImmediateAsU64())});
}
void FoldMostSignificantBit(IR::Inst& inst) {
static void FoldMostSignificantBit(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) {
return;
}
@ -238,7 +238,7 @@ void FoldMostSignificantBit(IR::Inst& inst) {
inst.ReplaceUsesWith(IR::Value{(operand.GetImmediateAsU64() >> 31) != 0});
}
void FoldMostSignificantWord(IR::Inst& inst) {
static void FoldMostSignificantWord(IR::Inst& inst) {
IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp);
if (!inst.AreAllArgsImmediates()) {
@ -260,7 +260,7 @@ void FoldMostSignificantWord(IR::Inst& inst) {
// 4. x * 1 -> x
// 5. 1 * y -> y
//
void FoldMultiply(IR::Inst& inst, bool is_32_bit) {
static void FoldMultiply(IR::Inst& inst, bool is_32_bit) {
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a * b; })) {
const auto rhs = inst.GetArg(1);
if (rhs.IsZero()) {
@ -272,7 +272,7 @@ void FoldMultiply(IR::Inst& inst, bool is_32_bit) {
}
// Folds NOT operations if the contained value is an immediate.
void FoldNOT(IR::Inst& inst, bool is_32_bit) {
static void FoldNOT(IR::Inst& inst, bool is_32_bit) {
const auto operand = inst.GetArg(0);
if (!operand.IsImmediate()) {
@ -289,7 +289,7 @@ void FoldNOT(IR::Inst& inst, bool is_32_bit) {
// 2. x | 0 -> x
// 3. 0 | y -> y
//
void FoldOR(IR::Inst& inst, bool is_32_bit) {
static void FoldOR(IR::Inst& inst, bool is_32_bit) {
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a | b; })) {
const auto rhs = inst.GetArg(1);
if (rhs.IsZero()) {
@ -298,7 +298,7 @@ void FoldOR(IR::Inst& inst, bool is_32_bit) {
}
}
bool FoldShifts(IR::Inst& inst) {
static bool FoldShifts(IR::Inst& inst) {
IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp);
// The 32-bit variants can contain 3 arguments, while the
@ -328,7 +328,7 @@ bool FoldShifts(IR::Inst& inst) {
return true;
}
void FoldSignExtendXToWord(IR::Inst& inst) {
static void FoldSignExtendXToWord(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) {
return;
}
@ -337,7 +337,7 @@ void FoldSignExtendXToWord(IR::Inst& inst) {
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)});
}
void FoldSignExtendXToLong(IR::Inst& inst) {
static void FoldSignExtendXToLong(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) {
return;
}
@ -346,7 +346,7 @@ void FoldSignExtendXToLong(IR::Inst& inst) {
inst.ReplaceUsesWith(IR::Value{static_cast<u64>(value)});
}
void FoldSub(IR::Inst& inst, bool is_32_bit) {
static void FoldSub(IR::Inst& inst, bool is_32_bit) {
if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) {
return;
}
@ -359,7 +359,7 @@ void FoldSub(IR::Inst& inst, bool is_32_bit) {
ReplaceUsesWith(inst, is_32_bit, result);
}
void FoldZeroExtendXToWord(IR::Inst& inst) {
static void FoldZeroExtendXToWord(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) {
return;
}
@ -368,7 +368,7 @@ void FoldZeroExtendXToWord(IR::Inst& inst) {
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)});
}
void FoldZeroExtendXToLong(IR::Inst& inst) {
static void FoldZeroExtendXToLong(IR::Inst& inst) {
if (!inst.AreAllArgsImmediates()) {
return;
}

View file

@ -13,11 +13,8 @@ namespace Dynarmic::Optimization {
void IdentityRemovalPass(IR::Block& block) {
std::vector<IR::Inst*> to_invalidate;
auto iter = block.begin();
while (iter != block.end()) {
for (auto iter = block.begin(); iter != block.end(); ) {
IR::Inst& inst = *iter;
const size_t num_args = inst.NumArgs();
for (size_t i = 0; i < num_args; i++) {
while (true) {
@ -27,18 +24,15 @@ void IdentityRemovalPass(IR::Block& block) {
inst.SetArg(i, arg.GetInst()->GetArg(0));
}
}
if (inst.GetOpcode() == IR::Opcode::Identity || inst.GetOpcode() == IR::Opcode::Void) {
iter = block.Instructions().erase(inst);
iter = block.Instructions().erase(iter);
to_invalidate.push_back(&inst);
} else {
++iter;
}
}
for (IR::Inst* inst : to_invalidate) {
for (auto* inst : to_invalidate)
inst->Invalidate();
}
}
} // namespace Dynarmic::Optimization

View file

@ -13,7 +13,7 @@ namespace Dynarmic::Optimization {
namespace {
void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) {
static void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) {
const IR::U128 x = (IR::U128)inst.GetArg(0);
const IR::U128 y = (IR::U128)inst.GetArg(1);
@ -37,13 +37,14 @@ void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) {
inst.ReplaceUsesWith(result);
}
void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) {
static void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) {
const IR::U128 x = (IR::U128)inst.GetArg(0);
const IR::U128 y = (IR::U128)inst.GetArg(1);
const IR::U128 z = (IR::U128)inst.GetArg(2);
const IR::U128 T0 = ir.VectorExtract(y, z, 32);
// TODO: this can use better pipelining m8
const IR::U128 lower_half = [&] {
const IR::U128 T = ir.VectorRotateWholeVectorRight(z, 64);
const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17);
@ -73,15 +74,15 @@ void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) {
inst.ReplaceUsesWith(result);
}
IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) {
static IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) {
return ir.Eor(ir.And(ir.Eor(y, z), x), z);
}
IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) {
static IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) {
return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z));
}
IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) {
static IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) {
const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2));
const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13));
const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22));
@ -89,7 +90,7 @@ IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) {
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3));
}
IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) {
static IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) {
const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6));
const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11));
const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25));
@ -97,7 +98,7 @@ IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) {
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3));
}
void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) {
static void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) {
IR::U128 x = (IR::U128)inst.GetArg(0);
IR::U128 y = (IR::U128)inst.GetArg(1);
const IR::U128 w = (IR::U128)inst.GetArg(2);
@ -139,7 +140,7 @@ void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) {
}
template<size_t esize, bool is_signed>
void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) {
static void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) {
IR::U128 n = (IR::U128)inst.GetArg(0);
IR::U128 m = (IR::U128)inst.GetArg(1);
@ -159,54 +160,52 @@ void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) {
}
IR::IREmitter ir{block};
for (auto& inst : block) {
ir.SetInsertionPointBefore(&inst);
switch (inst.GetOpcode()) {
for (auto iter = block.begin(); iter != block.end(); iter++) {
ir.SetInsertionPointBefore(iter);
switch (iter->GetOpcode()) {
case IR::Opcode::SHA256MessageSchedule0:
if (polyfill.sha256) {
PolyfillSHA256MessageSchedule0(ir, inst);
PolyfillSHA256MessageSchedule0(ir, *iter);
}
break;
case IR::Opcode::SHA256MessageSchedule1:
if (polyfill.sha256) {
PolyfillSHA256MessageSchedule1(ir, inst);
PolyfillSHA256MessageSchedule1(ir, *iter);
}
break;
case IR::Opcode::SHA256Hash:
if (polyfill.sha256) {
PolyfillSHA256Hash(ir, inst);
PolyfillSHA256Hash(ir, *iter);
}
break;
case IR::Opcode::VectorMultiplySignedWiden8:
if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<8, true>(ir, inst);
PolyfillVectorMultiplyWiden<8, true>(ir, *iter);
}
break;
case IR::Opcode::VectorMultiplySignedWiden16:
if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<16, true>(ir, inst);
PolyfillVectorMultiplyWiden<16, true>(ir, *iter);
}
break;
case IR::Opcode::VectorMultiplySignedWiden32:
if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<32, true>(ir, inst);
PolyfillVectorMultiplyWiden<32, true>(ir, *iter);
}
break;
case IR::Opcode::VectorMultiplyUnsignedWiden8:
if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<8, false>(ir, inst);
PolyfillVectorMultiplyWiden<8, false>(ir, *iter);
}
break;
case IR::Opcode::VectorMultiplyUnsignedWiden16:
if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<16, false>(ir, inst);
PolyfillVectorMultiplyWiden<16, false>(ir, *iter);
}
break;
case IR::Opcode::VectorMultiplyUnsignedWiden32:
if (polyfill.vector_multiply_widen) {
PolyfillVectorMultiplyWiden<32, false>(ir, inst);
PolyfillVectorMultiplyWiden<32, false>(ir, *iter);
}
break;
default: