mirror of
https://git.eden-emu.dev/eden-emu/eden.git
synced 2025-07-20 14:05:45 +00:00
[dynarmic] remove memory pool (standard malloc does a better job)
This commit is contained in:
parent
fc6f9de3fa
commit
ab631e6b28
16 changed files with 85 additions and 264 deletions
|
@ -53,8 +53,6 @@ add_library(dynarmic
|
|||
common/lut_from_list.h
|
||||
common/math_util.cpp
|
||||
common/math_util.h
|
||||
common/memory_pool.cpp
|
||||
common/memory_pool.h
|
||||
common/safe_ops.h
|
||||
common/spin_lock.h
|
||||
common/string_util.h
|
||||
|
@ -153,6 +151,7 @@ if ("A64" IN_LIST DYNARMIC_FRONTENDS)
|
|||
ir/opt/a64_callback_config_pass.cpp
|
||||
ir/opt/a64_get_set_elimination_pass.cpp
|
||||
ir/opt/a64_merge_interpret_blocks.cpp
|
||||
ir/opt/x64_peepholes.cpp
|
||||
)
|
||||
endif()
|
||||
|
||||
|
|
|
@ -35,11 +35,6 @@ EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block)
|
|||
|
||||
EmitContext::~EmitContext() = default;
|
||||
|
||||
void EmitContext::EraseInstruction(IR::Inst* inst) {
|
||||
block.Instructions().erase(inst);
|
||||
inst->ClearArgs();
|
||||
}
|
||||
|
||||
EmitX64::EmitX64(BlockOfCode& code)
|
||||
: code(code) {
|
||||
exception_handler.Register(code);
|
||||
|
|
|
@ -54,10 +54,7 @@ struct EmitContext {
|
|||
EmitContext(RegAlloc& reg_alloc, IR::Block& block);
|
||||
virtual ~EmitContext();
|
||||
|
||||
void EraseInstruction(IR::Inst* inst);
|
||||
|
||||
virtual FP::FPCR FPCR(bool fpcr_controlled = true) const = 0;
|
||||
|
||||
virtual bool HasOptimization(OptimizationFlag flag) const = 0;
|
||||
|
||||
RegAlloc& reg_alloc;
|
||||
|
|
|
@ -40,7 +40,6 @@ static void EmitCRC32Castagnoli(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
|||
|
||||
static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, const int data_size) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size < 32) {
|
||||
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg64 value = ctx.reg_alloc.UseScratchGpr(args[1]);
|
||||
|
@ -69,10 +68,7 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
|
|||
code.pextrd(crc, xmm_value, 2);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, crc);
|
||||
return;
|
||||
}
|
||||
|
||||
if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 32) {
|
||||
} else if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 32) {
|
||||
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(args[1]).cvt32();
|
||||
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
|
||||
|
@ -90,10 +86,7 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
|
|||
code.pextrd(crc, xmm_value, 2);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, crc);
|
||||
return;
|
||||
}
|
||||
|
||||
if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 64) {
|
||||
} else if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 64) {
|
||||
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]);
|
||||
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
|
||||
|
@ -111,12 +104,11 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
|
|||
code.pextrd(crc, xmm_value, 2);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, crc);
|
||||
return;
|
||||
} else {
|
||||
ctx.reg_alloc.HostCall(inst, args[0], args[1], {});
|
||||
code.mov(code.ABI_PARAM3, data_size / CHAR_BIT);
|
||||
code.CallFunction(&CRC32::ComputeCRC32ISO);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.HostCall(inst, args[0], args[1], {});
|
||||
code.mov(code.ABI_PARAM3, data_size / CHAR_BIT);
|
||||
code.CallFunction(&CRC32::ComputeCRC32ISO);
|
||||
}
|
||||
|
||||
void EmitX64::EmitCRC32Castagnoli8(EmitContext& ctx, IR::Inst* inst) {
|
||||
|
|
|
@ -236,23 +236,19 @@ void DenormalsAreZero(BlockOfCode& code, FP::FPCR fpcr, std::initializer_list<Xb
|
|||
FpFixup::Norm_Src,
|
||||
FpFixup::Norm_Src,
|
||||
FpFixup::Norm_Src,
|
||||
FpFixup::Norm_Src);
|
||||
|
||||
FpFixup::Norm_Src
|
||||
);
|
||||
FCODE(vmovap)(tmp, code.BConst<fsize>(xword, denormal_to_zero));
|
||||
|
||||
for (const Xbyak::Xmm& xmm : to_daz) {
|
||||
for (const Xbyak::Xmm& xmm : to_daz)
|
||||
FCODE(vfixupimmp)(xmm, xmm, tmp, u8(0));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (fpcr.RMode() != FP::RoundingMode::TowardsMinusInfinity) {
|
||||
code.movaps(tmp, GetNegativeZeroVector<fsize>(code));
|
||||
} else {
|
||||
code.xorps(tmp, tmp);
|
||||
}
|
||||
for (const Xbyak::Xmm& xmm : to_daz) {
|
||||
FCODE(addp)(xmm, tmp);
|
||||
if (fpcr.RMode() != FP::RoundingMode::TowardsMinusInfinity) {
|
||||
code.movaps(tmp, GetNegativeZeroVector<fsize>(code));
|
||||
} else {
|
||||
code.xorps(tmp, tmp);
|
||||
}
|
||||
for (const Xbyak::Xmm& xmm : to_daz)
|
||||
FCODE(addp)(xmm, tmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,13 +0,0 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/common/memory_pool.h"
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
namespace Dynarmic::Common {
|
||||
|
||||
|
||||
} // namespace Dynarmic::Common
|
|
@ -1,61 +0,0 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
|
||||
namespace Dynarmic::Common {
|
||||
|
||||
/// @tparam object_size Byte-size of objects to construct
|
||||
/// @tparam slab_size Number of objects to have per slab
|
||||
template<size_t object_size, size_t slab_size>
|
||||
class Pool {
|
||||
public:
|
||||
inline Pool() noexcept {
|
||||
AllocateNewSlab();
|
||||
}
|
||||
inline ~Pool() noexcept {
|
||||
std::free(current_slab);
|
||||
for (char* slab : slabs) {
|
||||
std::free(slab);
|
||||
}
|
||||
}
|
||||
|
||||
Pool(const Pool&) = delete;
|
||||
Pool(Pool&&) = delete;
|
||||
|
||||
Pool& operator=(const Pool&) = delete;
|
||||
Pool& operator=(Pool&&) = delete;
|
||||
|
||||
/// @brief Returns a pointer to an `object_size`-bytes block of memory.
|
||||
[[nodiscard]] void* Alloc() noexcept {
|
||||
if (remaining == 0) {
|
||||
slabs.push_back(current_slab);
|
||||
AllocateNewSlab();
|
||||
}
|
||||
void* ret = static_cast<void*>(current_ptr);
|
||||
current_ptr += object_size;
|
||||
remaining--;
|
||||
return ret;
|
||||
}
|
||||
private:
|
||||
/// @brief Allocates a completely new memory slab.
|
||||
/// Used when an entirely new slab is needed
|
||||
/// due the current one running out of usable space.
|
||||
void AllocateNewSlab() noexcept {
|
||||
current_slab = static_cast<char*>(std::malloc(object_size * slab_size));
|
||||
current_ptr = current_slab;
|
||||
remaining = slab_size;
|
||||
}
|
||||
|
||||
std::vector<char*> slabs;
|
||||
char* current_slab = nullptr;
|
||||
char* current_ptr = nullptr;
|
||||
size_t remaining = 0;
|
||||
};
|
||||
|
||||
} // namespace Dynarmic::Common
|
|
@ -13,7 +13,6 @@
|
|||
#include <fmt/format.h>
|
||||
#include <mcl/assert.hpp>
|
||||
|
||||
#include "dynarmic/common/memory_pool.h"
|
||||
#include "dynarmic/frontend/A32/a32_types.h"
|
||||
#include "dynarmic/frontend/A64/a64_types.h"
|
||||
#include "dynarmic/ir/cond.h"
|
||||
|
@ -24,8 +23,7 @@ namespace Dynarmic::IR {
|
|||
Block::Block(const LocationDescriptor& location)
|
||||
: location{location},
|
||||
end_location{location},
|
||||
cond{Cond::AL},
|
||||
instruction_alloc_pool{std::make_unique<std::remove_reference_t<decltype(*instruction_alloc_pool)>>()}
|
||||
cond{Cond::AL}
|
||||
{
|
||||
|
||||
}
|
||||
|
@ -37,7 +35,7 @@ Block::Block(const LocationDescriptor& location)
|
|||
/// @param args A sequence of Value instances used as arguments for the instruction.
|
||||
/// @returns Iterator to the newly created instruction.
|
||||
Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list<Value> args) noexcept {
|
||||
IR::Inst* inst = new (instruction_alloc_pool->Alloc()) IR::Inst(opcode);
|
||||
IR::Inst* inst = new IR::Inst(opcode);
|
||||
DEBUG_ASSERT(args.size() == inst->NumArgs());
|
||||
std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable {
|
||||
inst->SetArg(index, arg);
|
||||
|
@ -83,9 +81,7 @@ static std::string TerminalToString(const Terminal& terminal_variant) noexcept {
|
|||
}
|
||||
|
||||
std::string DumpBlock(const IR::Block& block) noexcept {
|
||||
std::string ret;
|
||||
|
||||
ret += fmt::format("Block: location={}\n", block.Location());
|
||||
std::string ret = fmt::format("Block: location={}\n", block.Location());
|
||||
ret += fmt::format("cycles={}", block.CycleCount());
|
||||
ret += fmt::format(", entry_cond={}", A64::CondToString(block.GetCondition()));
|
||||
if (block.GetCondition() != Cond::AL) {
|
||||
|
@ -113,6 +109,8 @@ std::string DumpBlock(const IR::Block& block) noexcept {
|
|||
return fmt::format("#{:#x}", arg.GetU32());
|
||||
case Type::U64:
|
||||
return fmt::format("#{:#x}", arg.GetU64());
|
||||
case Type::U128:
|
||||
return fmt::format("#<u128>");
|
||||
case Type::A32Reg:
|
||||
return A32::RegToString(arg.GetA32RegRef());
|
||||
case Type::A32ExtReg:
|
||||
|
@ -155,14 +153,9 @@ std::string DumpBlock(const IR::Block& block) noexcept {
|
|||
ret += fmt::format("<type error: {} != {}>", GetNameOf(actual_type), GetNameOf(expected_type));
|
||||
}
|
||||
}
|
||||
|
||||
ret += fmt::format(" (uses: {})", inst.UseCount());
|
||||
|
||||
ret += '\n';
|
||||
ret += fmt::format(" (uses: {})\n", inst.UseCount());
|
||||
}
|
||||
|
||||
ret += "terminal = " + TerminalToString(block.GetTerminal()) + '\n';
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -17,8 +17,6 @@
|
|||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/terminal.h"
|
||||
#include "dynarmic/ir/value.h"
|
||||
#include "dynarmic/ir/dense_list.h"
|
||||
#include "dynarmic/common/memory_pool.h"
|
||||
|
||||
namespace Dynarmic::IR {
|
||||
|
||||
|
@ -76,7 +74,7 @@ public:
|
|||
/// @param op Opcode representing the instruction to add.
|
||||
/// @param args A sequence of Value instances used as arguments for the instruction.
|
||||
inline void AppendNewInst(const Opcode opcode, const std::initializer_list<IR::Value> args) noexcept {
|
||||
PrependNewInst(end(), opcode, args);
|
||||
PrependNewInst(instructions.end(), opcode, args);
|
||||
}
|
||||
iterator PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list<Value> args) noexcept;
|
||||
|
||||
|
@ -171,8 +169,6 @@ private:
|
|||
LocationDescriptor end_location;
|
||||
/// Conditional to pass in order to execute this block
|
||||
Cond cond;
|
||||
/// Memory pool for instruction list
|
||||
std::unique_ptr<Common::Pool<sizeof(Inst), 2097152UL / sizeof(Inst)>> instruction_alloc_pool;
|
||||
/// Terminal instruction of this block.
|
||||
Terminal terminal = Term::Invalid{};
|
||||
/// Number of cycles this block takes to execute if the conditional fails.
|
||||
|
|
58
externals/dynarmic/src/dynarmic/ir/dense_list.h
vendored
58
externals/dynarmic/src/dynarmic/ir/dense_list.h
vendored
|
@ -1,58 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstddef>
|
||||
#include <deque>
|
||||
|
||||
namespace Dynarmic {
|
||||
template<typename T> struct dense_list {
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using size_type = std::size_t;
|
||||
using value_type = T;
|
||||
using pointer = value_type*;
|
||||
using const_pointer = const value_type*;
|
||||
using reference = value_type&;
|
||||
using const_reference = const value_type&;
|
||||
using iterator = std::deque<value_type>::iterator;
|
||||
using const_iterator = std::deque<value_type>::const_iterator;
|
||||
using reverse_iterator = std::reverse_iterator<iterator>;
|
||||
using const_reverse_iterator = std::reverse_iterator<const_iterator>;
|
||||
|
||||
inline bool empty() const noexcept { return list.empty(); }
|
||||
inline size_type size() const noexcept { return list.size(); }
|
||||
|
||||
inline value_type& front() noexcept { return list.front(); }
|
||||
inline const value_type& front() const noexcept { return list.front(); }
|
||||
|
||||
inline value_type& back() noexcept { return list.back(); }
|
||||
inline const value_type& back() const noexcept { return list.back(); }
|
||||
|
||||
inline iterator begin() noexcept { return list.begin(); }
|
||||
inline const_iterator begin() const noexcept { return list.begin(); }
|
||||
inline iterator end() noexcept { return list.end(); }
|
||||
inline const_iterator end() const noexcept { return list.end(); }
|
||||
|
||||
inline reverse_iterator rbegin() noexcept { return list.rbegin(); }
|
||||
inline const_reverse_iterator rbegin() const noexcept { return list.rbegin(); }
|
||||
inline reverse_iterator rend() noexcept { return list.rend(); }
|
||||
inline const_reverse_iterator rend() const noexcept { return list.rend(); }
|
||||
|
||||
inline const_iterator cbegin() const noexcept { return list.cbegin(); }
|
||||
inline const_iterator cend() const noexcept { return list.cend(); }
|
||||
|
||||
inline const_reverse_iterator crbegin() const noexcept { return list.crbegin(); }
|
||||
inline const_reverse_iterator crend() const noexcept { return list.crend(); }
|
||||
|
||||
inline iterator insert_before(iterator it, value_type& value) noexcept {
|
||||
if (it == list.begin()) {
|
||||
list.push_front(value);
|
||||
return list.begin();
|
||||
}
|
||||
auto const index = std::distance(list.begin(), it - 1);
|
||||
list.insert(it - 1, value);
|
||||
return list.begin() + index;
|
||||
}
|
||||
|
||||
std::deque<value_type> list;
|
||||
};
|
||||
}
|
|
@ -2947,19 +2947,10 @@ public:
|
|||
block.SetTerminal(terminal);
|
||||
}
|
||||
|
||||
void SetInsertionPointBefore(IR::Inst* new_insertion_point) {
|
||||
insertion_point = IR::Block::iterator{*new_insertion_point};
|
||||
}
|
||||
|
||||
void SetInsertionPointBefore(IR::Block::iterator new_insertion_point) {
|
||||
insertion_point = new_insertion_point;
|
||||
}
|
||||
|
||||
void SetInsertionPointAfter(IR::Inst* new_insertion_point) {
|
||||
insertion_point = IR::Block::iterator{*new_insertion_point};
|
||||
++insertion_point;
|
||||
}
|
||||
|
||||
void SetInsertionPointAfter(IR::Block::iterator new_insertion_point) {
|
||||
insertion_point = new_insertion_point;
|
||||
++insertion_point;
|
||||
|
|
|
@ -22,8 +22,7 @@ namespace Dynarmic::Optimization {
|
|||
namespace {
|
||||
|
||||
void FlagsPass(IR::Block& block) {
|
||||
using Iterator = std::reverse_iterator<IR::Block::iterator>;
|
||||
|
||||
using Iterator = IR::Block::reverse_iterator;
|
||||
struct FlagInfo {
|
||||
bool set_not_required = false;
|
||||
bool has_value_request = false;
|
||||
|
@ -185,10 +184,10 @@ void RegisterPass(IR::Block& block) {
|
|||
using Iterator = IR::Block::iterator;
|
||||
|
||||
struct RegInfo {
|
||||
IR::Value register_value;
|
||||
std::optional<Iterator> last_set_instruction;
|
||||
IR::Value register_value;
|
||||
};
|
||||
std::array<RegInfo, 15> reg_info;
|
||||
alignas(64) std::array<RegInfo, 15> reg_info;
|
||||
|
||||
const auto do_get = [](RegInfo& info, Iterator get_inst) {
|
||||
if (info.register_value.IsEmpty()) {
|
||||
|
@ -203,12 +202,12 @@ void RegisterPass(IR::Block& block) {
|
|||
(*info.last_set_instruction)->Invalidate();
|
||||
}
|
||||
info = {
|
||||
.register_value = value,
|
||||
.last_set_instruction = set_inst,
|
||||
.register_value = value,
|
||||
};
|
||||
};
|
||||
|
||||
enum class ExtValueType {
|
||||
enum class ExtValueType : std::uint8_t {
|
||||
Empty,
|
||||
Single,
|
||||
Double,
|
||||
|
@ -216,19 +215,20 @@ void RegisterPass(IR::Block& block) {
|
|||
VectorQuad,
|
||||
};
|
||||
struct ExtRegInfo {
|
||||
ExtValueType value_type = {};
|
||||
IR::Value register_value;
|
||||
std::optional<Iterator> last_set_instruction;
|
||||
ExtValueType value_type = {};
|
||||
};
|
||||
std::array<ExtRegInfo, 64> ext_reg_info;
|
||||
// Max returned by RegNumber = 31 (but multiplied by 4 in some cases)
|
||||
alignas(64) std::array<ExtRegInfo, 128> ext_reg_info;
|
||||
|
||||
const auto do_ext_get = [](ExtValueType type, std::initializer_list<std::reference_wrapper<ExtRegInfo>> infos, Iterator get_inst) {
|
||||
if (!std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) {
|
||||
for (auto& info : infos) {
|
||||
info.get() = {
|
||||
.value_type = type,
|
||||
.register_value = IR::Value(&*get_inst),
|
||||
.last_set_instruction = std::nullopt,
|
||||
.value_type = type,
|
||||
};
|
||||
}
|
||||
return;
|
||||
|
@ -244,9 +244,9 @@ void RegisterPass(IR::Block& block) {
|
|||
}
|
||||
for (auto& info : infos) {
|
||||
info.get() = {
|
||||
.value_type = type,
|
||||
.register_value = value,
|
||||
.last_set_instruction = set_inst,
|
||||
.value_type = type,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
|
|
@ -17,7 +17,8 @@ void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) {
|
|||
return;
|
||||
}
|
||||
|
||||
for (auto& inst : block) {
|
||||
for (auto iter = block.begin(); iter != block.end(); iter++) {
|
||||
auto& inst = *iter;
|
||||
if (inst.GetOpcode() != IR::Opcode::A64DataCacheOperationRaised) {
|
||||
continue;
|
||||
}
|
||||
|
@ -26,7 +27,7 @@ void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) {
|
|||
if (op == A64::DataCacheOperation::ZeroByVA) {
|
||||
A64::IREmitter ir{block};
|
||||
ir.current_location = A64::LocationDescriptor{IR::LocationDescriptor{inst.GetArg(0).GetU64()}};
|
||||
ir.SetInsertionPointBefore(&inst);
|
||||
ir.SetInsertionPointBefore(iter);
|
||||
|
||||
size_t bytes = 4 << static_cast<size_t>(conf.dczid_el0 & 0b1111);
|
||||
IR::U64 addr{inst.GetArg(2)};
|
||||
|
|
|
@ -22,9 +22,9 @@ using Op = Dynarmic::IR::Opcode;
|
|||
|
||||
namespace {
|
||||
|
||||
// Tiny helper to avoid the need to store based off the opcode
|
||||
// bit size all over the place within folding functions.
|
||||
void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) {
|
||||
/// Tiny helper to avoid the need to store based off the opcode
|
||||
/// bit size all over the place within folding functions.
|
||||
static void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) {
|
||||
if (is_32_bit) {
|
||||
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)});
|
||||
} else {
|
||||
|
@ -32,12 +32,12 @@ void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) {
|
|||
}
|
||||
}
|
||||
|
||||
IR::Value Value(bool is_32_bit, u64 value) {
|
||||
static IR::Value Value(bool is_32_bit, u64 value) {
|
||||
return is_32_bit ? IR::Value{static_cast<u32>(value)} : IR::Value{value};
|
||||
}
|
||||
|
||||
template<typename ImmFn>
|
||||
bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) {
|
||||
static bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) {
|
||||
const auto lhs = inst.GetArg(0);
|
||||
const auto rhs = inst.GetArg(1);
|
||||
|
||||
|
@ -75,7 +75,7 @@ bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) {
|
|||
return true;
|
||||
}
|
||||
|
||||
void FoldAdd(IR::Inst& inst, bool is_32_bit) {
|
||||
static void FoldAdd(IR::Inst& inst, bool is_32_bit) {
|
||||
const auto lhs = inst.GetArg(0);
|
||||
const auto rhs = inst.GetArg(1);
|
||||
const auto carry = inst.GetArg(2);
|
||||
|
@ -125,7 +125,7 @@ void FoldAdd(IR::Inst& inst, bool is_32_bit) {
|
|||
/// 4. x & y -> y (where x has all bits set to 1)
|
||||
/// 5. x & y -> x (where y has all bits set to 1)
|
||||
///
|
||||
void FoldAND(IR::Inst& inst, bool is_32_bit) {
|
||||
static void FoldAND(IR::Inst& inst, bool is_32_bit) {
|
||||
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a & b; })) {
|
||||
const auto rhs = inst.GetArg(1);
|
||||
if (rhs.IsZero()) {
|
||||
|
@ -140,7 +140,7 @@ void FoldAND(IR::Inst& inst, bool is_32_bit) {
|
|||
///
|
||||
/// 1. imm -> swap(imm)
|
||||
///
|
||||
void FoldByteReverse(IR::Inst& inst, Op op) {
|
||||
static void FoldByteReverse(IR::Inst& inst, Op op) {
|
||||
const auto operand = inst.GetArg(0);
|
||||
|
||||
if (!operand.IsImmediate()) {
|
||||
|
@ -165,7 +165,7 @@ void FoldByteReverse(IR::Inst& inst, Op op) {
|
|||
/// 2. imm_x / imm_y -> result
|
||||
/// 3. x / 1 -> x
|
||||
///
|
||||
void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) {
|
||||
static void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) {
|
||||
const auto rhs = inst.GetArg(1);
|
||||
|
||||
if (rhs.IsZero()) {
|
||||
|
@ -193,7 +193,7 @@ void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) {
|
|||
// 2. x ^ 0 -> x
|
||||
// 3. 0 ^ y -> y
|
||||
//
|
||||
void FoldEOR(IR::Inst& inst, bool is_32_bit) {
|
||||
static void FoldEOR(IR::Inst& inst, bool is_32_bit) {
|
||||
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a ^ b; })) {
|
||||
const auto rhs = inst.GetArg(1);
|
||||
if (rhs.IsZero()) {
|
||||
|
@ -202,7 +202,7 @@ void FoldEOR(IR::Inst& inst, bool is_32_bit) {
|
|||
}
|
||||
}
|
||||
|
||||
void FoldLeastSignificantByte(IR::Inst& inst) {
|
||||
static void FoldLeastSignificantByte(IR::Inst& inst) {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
return;
|
||||
}
|
||||
|
@ -211,7 +211,7 @@ void FoldLeastSignificantByte(IR::Inst& inst) {
|
|||
inst.ReplaceUsesWith(IR::Value{static_cast<u8>(operand.GetImmediateAsU64())});
|
||||
}
|
||||
|
||||
void FoldLeastSignificantHalf(IR::Inst& inst) {
|
||||
static void FoldLeastSignificantHalf(IR::Inst& inst) {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
return;
|
||||
}
|
||||
|
@ -220,7 +220,7 @@ void FoldLeastSignificantHalf(IR::Inst& inst) {
|
|||
inst.ReplaceUsesWith(IR::Value{static_cast<u16>(operand.GetImmediateAsU64())});
|
||||
}
|
||||
|
||||
void FoldLeastSignificantWord(IR::Inst& inst) {
|
||||
static void FoldLeastSignificantWord(IR::Inst& inst) {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
return;
|
||||
}
|
||||
|
@ -229,7 +229,7 @@ void FoldLeastSignificantWord(IR::Inst& inst) {
|
|||
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(operand.GetImmediateAsU64())});
|
||||
}
|
||||
|
||||
void FoldMostSignificantBit(IR::Inst& inst) {
|
||||
static void FoldMostSignificantBit(IR::Inst& inst) {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
return;
|
||||
}
|
||||
|
@ -238,7 +238,7 @@ void FoldMostSignificantBit(IR::Inst& inst) {
|
|||
inst.ReplaceUsesWith(IR::Value{(operand.GetImmediateAsU64() >> 31) != 0});
|
||||
}
|
||||
|
||||
void FoldMostSignificantWord(IR::Inst& inst) {
|
||||
static void FoldMostSignificantWord(IR::Inst& inst) {
|
||||
IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp);
|
||||
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
|
@ -260,7 +260,7 @@ void FoldMostSignificantWord(IR::Inst& inst) {
|
|||
// 4. x * 1 -> x
|
||||
// 5. 1 * y -> y
|
||||
//
|
||||
void FoldMultiply(IR::Inst& inst, bool is_32_bit) {
|
||||
static void FoldMultiply(IR::Inst& inst, bool is_32_bit) {
|
||||
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a * b; })) {
|
||||
const auto rhs = inst.GetArg(1);
|
||||
if (rhs.IsZero()) {
|
||||
|
@ -272,7 +272,7 @@ void FoldMultiply(IR::Inst& inst, bool is_32_bit) {
|
|||
}
|
||||
|
||||
// Folds NOT operations if the contained value is an immediate.
|
||||
void FoldNOT(IR::Inst& inst, bool is_32_bit) {
|
||||
static void FoldNOT(IR::Inst& inst, bool is_32_bit) {
|
||||
const auto operand = inst.GetArg(0);
|
||||
|
||||
if (!operand.IsImmediate()) {
|
||||
|
@ -289,7 +289,7 @@ void FoldNOT(IR::Inst& inst, bool is_32_bit) {
|
|||
// 2. x | 0 -> x
|
||||
// 3. 0 | y -> y
|
||||
//
|
||||
void FoldOR(IR::Inst& inst, bool is_32_bit) {
|
||||
static void FoldOR(IR::Inst& inst, bool is_32_bit) {
|
||||
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a | b; })) {
|
||||
const auto rhs = inst.GetArg(1);
|
||||
if (rhs.IsZero()) {
|
||||
|
@ -298,7 +298,7 @@ void FoldOR(IR::Inst& inst, bool is_32_bit) {
|
|||
}
|
||||
}
|
||||
|
||||
bool FoldShifts(IR::Inst& inst) {
|
||||
static bool FoldShifts(IR::Inst& inst) {
|
||||
IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp);
|
||||
|
||||
// The 32-bit variants can contain 3 arguments, while the
|
||||
|
@ -328,7 +328,7 @@ bool FoldShifts(IR::Inst& inst) {
|
|||
return true;
|
||||
}
|
||||
|
||||
void FoldSignExtendXToWord(IR::Inst& inst) {
|
||||
static void FoldSignExtendXToWord(IR::Inst& inst) {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
return;
|
||||
}
|
||||
|
@ -337,7 +337,7 @@ void FoldSignExtendXToWord(IR::Inst& inst) {
|
|||
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)});
|
||||
}
|
||||
|
||||
void FoldSignExtendXToLong(IR::Inst& inst) {
|
||||
static void FoldSignExtendXToLong(IR::Inst& inst) {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
return;
|
||||
}
|
||||
|
@ -346,7 +346,7 @@ void FoldSignExtendXToLong(IR::Inst& inst) {
|
|||
inst.ReplaceUsesWith(IR::Value{static_cast<u64>(value)});
|
||||
}
|
||||
|
||||
void FoldSub(IR::Inst& inst, bool is_32_bit) {
|
||||
static void FoldSub(IR::Inst& inst, bool is_32_bit) {
|
||||
if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) {
|
||||
return;
|
||||
}
|
||||
|
@ -359,7 +359,7 @@ void FoldSub(IR::Inst& inst, bool is_32_bit) {
|
|||
ReplaceUsesWith(inst, is_32_bit, result);
|
||||
}
|
||||
|
||||
void FoldZeroExtendXToWord(IR::Inst& inst) {
|
||||
static void FoldZeroExtendXToWord(IR::Inst& inst) {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
return;
|
||||
}
|
||||
|
@ -368,7 +368,7 @@ void FoldZeroExtendXToWord(IR::Inst& inst) {
|
|||
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)});
|
||||
}
|
||||
|
||||
void FoldZeroExtendXToLong(IR::Inst& inst) {
|
||||
static void FoldZeroExtendXToLong(IR::Inst& inst) {
|
||||
if (!inst.AreAllArgsImmediates()) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -13,11 +13,8 @@ namespace Dynarmic::Optimization {
|
|||
|
||||
void IdentityRemovalPass(IR::Block& block) {
|
||||
std::vector<IR::Inst*> to_invalidate;
|
||||
|
||||
auto iter = block.begin();
|
||||
while (iter != block.end()) {
|
||||
for (auto iter = block.begin(); iter != block.end(); ) {
|
||||
IR::Inst& inst = *iter;
|
||||
|
||||
const size_t num_args = inst.NumArgs();
|
||||
for (size_t i = 0; i < num_args; i++) {
|
||||
while (true) {
|
||||
|
@ -27,18 +24,15 @@ void IdentityRemovalPass(IR::Block& block) {
|
|||
inst.SetArg(i, arg.GetInst()->GetArg(0));
|
||||
}
|
||||
}
|
||||
|
||||
if (inst.GetOpcode() == IR::Opcode::Identity || inst.GetOpcode() == IR::Opcode::Void) {
|
||||
iter = block.Instructions().erase(inst);
|
||||
iter = block.Instructions().erase(iter);
|
||||
to_invalidate.push_back(&inst);
|
||||
} else {
|
||||
++iter;
|
||||
}
|
||||
}
|
||||
|
||||
for (IR::Inst* inst : to_invalidate) {
|
||||
for (auto* inst : to_invalidate)
|
||||
inst->Invalidate();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Optimization
|
||||
|
|
|
@ -13,7 +13,7 @@ namespace Dynarmic::Optimization {
|
|||
|
||||
namespace {
|
||||
|
||||
void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) {
|
||||
static void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) {
|
||||
const IR::U128 x = (IR::U128)inst.GetArg(0);
|
||||
const IR::U128 y = (IR::U128)inst.GetArg(1);
|
||||
|
||||
|
@ -37,13 +37,14 @@ void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) {
|
|||
inst.ReplaceUsesWith(result);
|
||||
}
|
||||
|
||||
void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) {
|
||||
static void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) {
|
||||
const IR::U128 x = (IR::U128)inst.GetArg(0);
|
||||
const IR::U128 y = (IR::U128)inst.GetArg(1);
|
||||
const IR::U128 z = (IR::U128)inst.GetArg(2);
|
||||
|
||||
const IR::U128 T0 = ir.VectorExtract(y, z, 32);
|
||||
|
||||
// TODO: this can use better pipelining m8
|
||||
const IR::U128 lower_half = [&] {
|
||||
const IR::U128 T = ir.VectorRotateWholeVectorRight(z, 64);
|
||||
const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17);
|
||||
|
@ -73,15 +74,15 @@ void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) {
|
|||
inst.ReplaceUsesWith(result);
|
||||
}
|
||||
|
||||
IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) {
|
||||
static IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) {
|
||||
return ir.Eor(ir.And(ir.Eor(y, z), x), z);
|
||||
}
|
||||
|
||||
IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) {
|
||||
static IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) {
|
||||
return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z));
|
||||
}
|
||||
|
||||
IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) {
|
||||
static IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) {
|
||||
const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2));
|
||||
const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13));
|
||||
const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22));
|
||||
|
@ -89,7 +90,7 @@ IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) {
|
|||
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3));
|
||||
}
|
||||
|
||||
IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) {
|
||||
static IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) {
|
||||
const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6));
|
||||
const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11));
|
||||
const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25));
|
||||
|
@ -97,7 +98,7 @@ IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) {
|
|||
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3));
|
||||
}
|
||||
|
||||
void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) {
|
||||
static void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) {
|
||||
IR::U128 x = (IR::U128)inst.GetArg(0);
|
||||
IR::U128 y = (IR::U128)inst.GetArg(1);
|
||||
const IR::U128 w = (IR::U128)inst.GetArg(2);
|
||||
|
@ -139,7 +140,7 @@ void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) {
|
|||
}
|
||||
|
||||
template<size_t esize, bool is_signed>
|
||||
void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) {
|
||||
static void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) {
|
||||
IR::U128 n = (IR::U128)inst.GetArg(0);
|
||||
IR::U128 m = (IR::U128)inst.GetArg(1);
|
||||
|
||||
|
@ -159,54 +160,52 @@ void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) {
|
|||
}
|
||||
|
||||
IR::IREmitter ir{block};
|
||||
|
||||
for (auto& inst : block) {
|
||||
ir.SetInsertionPointBefore(&inst);
|
||||
|
||||
switch (inst.GetOpcode()) {
|
||||
for (auto iter = block.begin(); iter != block.end(); iter++) {
|
||||
ir.SetInsertionPointBefore(iter);
|
||||
switch (iter->GetOpcode()) {
|
||||
case IR::Opcode::SHA256MessageSchedule0:
|
||||
if (polyfill.sha256) {
|
||||
PolyfillSHA256MessageSchedule0(ir, inst);
|
||||
PolyfillSHA256MessageSchedule0(ir, *iter);
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::SHA256MessageSchedule1:
|
||||
if (polyfill.sha256) {
|
||||
PolyfillSHA256MessageSchedule1(ir, inst);
|
||||
PolyfillSHA256MessageSchedule1(ir, *iter);
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::SHA256Hash:
|
||||
if (polyfill.sha256) {
|
||||
PolyfillSHA256Hash(ir, inst);
|
||||
PolyfillSHA256Hash(ir, *iter);
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::VectorMultiplySignedWiden8:
|
||||
if (polyfill.vector_multiply_widen) {
|
||||
PolyfillVectorMultiplyWiden<8, true>(ir, inst);
|
||||
PolyfillVectorMultiplyWiden<8, true>(ir, *iter);
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::VectorMultiplySignedWiden16:
|
||||
if (polyfill.vector_multiply_widen) {
|
||||
PolyfillVectorMultiplyWiden<16, true>(ir, inst);
|
||||
PolyfillVectorMultiplyWiden<16, true>(ir, *iter);
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::VectorMultiplySignedWiden32:
|
||||
if (polyfill.vector_multiply_widen) {
|
||||
PolyfillVectorMultiplyWiden<32, true>(ir, inst);
|
||||
PolyfillVectorMultiplyWiden<32, true>(ir, *iter);
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::VectorMultiplyUnsignedWiden8:
|
||||
if (polyfill.vector_multiply_widen) {
|
||||
PolyfillVectorMultiplyWiden<8, false>(ir, inst);
|
||||
PolyfillVectorMultiplyWiden<8, false>(ir, *iter);
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::VectorMultiplyUnsignedWiden16:
|
||||
if (polyfill.vector_multiply_widen) {
|
||||
PolyfillVectorMultiplyWiden<16, false>(ir, inst);
|
||||
PolyfillVectorMultiplyWiden<16, false>(ir, *iter);
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::VectorMultiplyUnsignedWiden32:
|
||||
if (polyfill.vector_multiply_widen) {
|
||||
PolyfillVectorMultiplyWiden<32, false>(ir, inst);
|
||||
PolyfillVectorMultiplyWiden<32, false>(ir, *iter);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue