mirror of
https://git.eden-emu.dev/eden-emu/eden.git
synced 2025-07-20 18:45:45 +00:00
[dynarmic] remove memory pool (standard malloc does a better job)
This commit is contained in:
parent
3164b05670
commit
2710ea7d8a
16 changed files with 85 additions and 264 deletions
|
@ -53,8 +53,6 @@ add_library(dynarmic
|
||||||
common/lut_from_list.h
|
common/lut_from_list.h
|
||||||
common/math_util.cpp
|
common/math_util.cpp
|
||||||
common/math_util.h
|
common/math_util.h
|
||||||
common/memory_pool.cpp
|
|
||||||
common/memory_pool.h
|
|
||||||
common/safe_ops.h
|
common/safe_ops.h
|
||||||
common/spin_lock.h
|
common/spin_lock.h
|
||||||
common/string_util.h
|
common/string_util.h
|
||||||
|
@ -153,6 +151,7 @@ if ("A64" IN_LIST DYNARMIC_FRONTENDS)
|
||||||
ir/opt/a64_callback_config_pass.cpp
|
ir/opt/a64_callback_config_pass.cpp
|
||||||
ir/opt/a64_get_set_elimination_pass.cpp
|
ir/opt/a64_get_set_elimination_pass.cpp
|
||||||
ir/opt/a64_merge_interpret_blocks.cpp
|
ir/opt/a64_merge_interpret_blocks.cpp
|
||||||
|
ir/opt/x64_peepholes.cpp
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
|
@ -35,11 +35,6 @@ EmitContext::EmitContext(RegAlloc& reg_alloc, IR::Block& block)
|
||||||
|
|
||||||
EmitContext::~EmitContext() = default;
|
EmitContext::~EmitContext() = default;
|
||||||
|
|
||||||
void EmitContext::EraseInstruction(IR::Inst* inst) {
|
|
||||||
block.Instructions().erase(inst);
|
|
||||||
inst->ClearArgs();
|
|
||||||
}
|
|
||||||
|
|
||||||
EmitX64::EmitX64(BlockOfCode& code)
|
EmitX64::EmitX64(BlockOfCode& code)
|
||||||
: code(code) {
|
: code(code) {
|
||||||
exception_handler.Register(code);
|
exception_handler.Register(code);
|
||||||
|
|
|
@ -54,10 +54,7 @@ struct EmitContext {
|
||||||
EmitContext(RegAlloc& reg_alloc, IR::Block& block);
|
EmitContext(RegAlloc& reg_alloc, IR::Block& block);
|
||||||
virtual ~EmitContext();
|
virtual ~EmitContext();
|
||||||
|
|
||||||
void EraseInstruction(IR::Inst* inst);
|
|
||||||
|
|
||||||
virtual FP::FPCR FPCR(bool fpcr_controlled = true) const = 0;
|
virtual FP::FPCR FPCR(bool fpcr_controlled = true) const = 0;
|
||||||
|
|
||||||
virtual bool HasOptimization(OptimizationFlag flag) const = 0;
|
virtual bool HasOptimization(OptimizationFlag flag) const = 0;
|
||||||
|
|
||||||
RegAlloc& reg_alloc;
|
RegAlloc& reg_alloc;
|
||||||
|
|
|
@ -40,7 +40,6 @@ static void EmitCRC32Castagnoli(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
||||||
|
|
||||||
static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, const int data_size) {
|
static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, const int data_size) {
|
||||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||||
|
|
||||||
if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size < 32) {
|
if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size < 32) {
|
||||||
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||||
const Xbyak::Reg64 value = ctx.reg_alloc.UseScratchGpr(args[1]);
|
const Xbyak::Reg64 value = ctx.reg_alloc.UseScratchGpr(args[1]);
|
||||||
|
@ -69,10 +68,7 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
|
||||||
code.pextrd(crc, xmm_value, 2);
|
code.pextrd(crc, xmm_value, 2);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, crc);
|
ctx.reg_alloc.DefineValue(inst, crc);
|
||||||
return;
|
} else if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 32) {
|
||||||
}
|
|
||||||
|
|
||||||
if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 32) {
|
|
||||||
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||||
const Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(args[1]).cvt32();
|
const Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(args[1]).cvt32();
|
||||||
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
|
||||||
|
@ -90,10 +86,7 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
|
||||||
code.pextrd(crc, xmm_value, 2);
|
code.pextrd(crc, xmm_value, 2);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, crc);
|
ctx.reg_alloc.DefineValue(inst, crc);
|
||||||
return;
|
} else if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 64) {
|
||||||
}
|
|
||||||
|
|
||||||
if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 64) {
|
|
||||||
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||||
const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]);
|
const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]);
|
||||||
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
|
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
|
||||||
|
@ -111,12 +104,11 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
|
||||||
code.pextrd(crc, xmm_value, 2);
|
code.pextrd(crc, xmm_value, 2);
|
||||||
|
|
||||||
ctx.reg_alloc.DefineValue(inst, crc);
|
ctx.reg_alloc.DefineValue(inst, crc);
|
||||||
return;
|
} else {
|
||||||
|
ctx.reg_alloc.HostCall(inst, args[0], args[1], {});
|
||||||
|
code.mov(code.ABI_PARAM3, data_size / CHAR_BIT);
|
||||||
|
code.CallFunction(&CRC32::ComputeCRC32ISO);
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.reg_alloc.HostCall(inst, args[0], args[1], {});
|
|
||||||
code.mov(code.ABI_PARAM3, data_size / CHAR_BIT);
|
|
||||||
code.CallFunction(&CRC32::ComputeCRC32ISO);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitX64::EmitCRC32Castagnoli8(EmitContext& ctx, IR::Inst* inst) {
|
void EmitX64::EmitCRC32Castagnoli8(EmitContext& ctx, IR::Inst* inst) {
|
||||||
|
|
|
@ -236,23 +236,19 @@ void DenormalsAreZero(BlockOfCode& code, FP::FPCR fpcr, std::initializer_list<Xb
|
||||||
FpFixup::Norm_Src,
|
FpFixup::Norm_Src,
|
||||||
FpFixup::Norm_Src,
|
FpFixup::Norm_Src,
|
||||||
FpFixup::Norm_Src,
|
FpFixup::Norm_Src,
|
||||||
FpFixup::Norm_Src);
|
FpFixup::Norm_Src
|
||||||
|
);
|
||||||
FCODE(vmovap)(tmp, code.BConst<fsize>(xword, denormal_to_zero));
|
FCODE(vmovap)(tmp, code.BConst<fsize>(xword, denormal_to_zero));
|
||||||
|
for (const Xbyak::Xmm& xmm : to_daz)
|
||||||
for (const Xbyak::Xmm& xmm : to_daz) {
|
|
||||||
FCODE(vfixupimmp)(xmm, xmm, tmp, u8(0));
|
FCODE(vfixupimmp)(xmm, xmm, tmp, u8(0));
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fpcr.RMode() != FP::RoundingMode::TowardsMinusInfinity) {
|
|
||||||
code.movaps(tmp, GetNegativeZeroVector<fsize>(code));
|
|
||||||
} else {
|
} else {
|
||||||
code.xorps(tmp, tmp);
|
if (fpcr.RMode() != FP::RoundingMode::TowardsMinusInfinity) {
|
||||||
}
|
code.movaps(tmp, GetNegativeZeroVector<fsize>(code));
|
||||||
for (const Xbyak::Xmm& xmm : to_daz) {
|
} else {
|
||||||
FCODE(addp)(xmm, tmp);
|
code.xorps(tmp, tmp);
|
||||||
|
}
|
||||||
|
for (const Xbyak::Xmm& xmm : to_daz)
|
||||||
|
FCODE(addp)(xmm, tmp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,13 +0,0 @@
|
||||||
/* This file is part of the dynarmic project.
|
|
||||||
* Copyright (c) 2016 MerryMage
|
|
||||||
* SPDX-License-Identifier: 0BSD
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "dynarmic/common/memory_pool.h"
|
|
||||||
|
|
||||||
#include <cstdlib>
|
|
||||||
|
|
||||||
namespace Dynarmic::Common {
|
|
||||||
|
|
||||||
|
|
||||||
} // namespace Dynarmic::Common
|
|
|
@ -1,61 +0,0 @@
|
||||||
/* This file is part of the dynarmic project.
|
|
||||||
* Copyright (c) 2016 MerryMage
|
|
||||||
* SPDX-License-Identifier: 0BSD
|
|
||||||
*/
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <cstddef>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
namespace Dynarmic::Common {
|
|
||||||
|
|
||||||
/// @tparam object_size Byte-size of objects to construct
|
|
||||||
/// @tparam slab_size Number of objects to have per slab
|
|
||||||
template<size_t object_size, size_t slab_size>
|
|
||||||
class Pool {
|
|
||||||
public:
|
|
||||||
inline Pool() noexcept {
|
|
||||||
AllocateNewSlab();
|
|
||||||
}
|
|
||||||
inline ~Pool() noexcept {
|
|
||||||
std::free(current_slab);
|
|
||||||
for (char* slab : slabs) {
|
|
||||||
std::free(slab);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Pool(const Pool&) = delete;
|
|
||||||
Pool(Pool&&) = delete;
|
|
||||||
|
|
||||||
Pool& operator=(const Pool&) = delete;
|
|
||||||
Pool& operator=(Pool&&) = delete;
|
|
||||||
|
|
||||||
/// @brief Returns a pointer to an `object_size`-bytes block of memory.
|
|
||||||
[[nodiscard]] void* Alloc() noexcept {
|
|
||||||
if (remaining == 0) {
|
|
||||||
slabs.push_back(current_slab);
|
|
||||||
AllocateNewSlab();
|
|
||||||
}
|
|
||||||
void* ret = static_cast<void*>(current_ptr);
|
|
||||||
current_ptr += object_size;
|
|
||||||
remaining--;
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
private:
|
|
||||||
/// @brief Allocates a completely new memory slab.
|
|
||||||
/// Used when an entirely new slab is needed
|
|
||||||
/// due the current one running out of usable space.
|
|
||||||
void AllocateNewSlab() noexcept {
|
|
||||||
current_slab = static_cast<char*>(std::malloc(object_size * slab_size));
|
|
||||||
current_ptr = current_slab;
|
|
||||||
remaining = slab_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<char*> slabs;
|
|
||||||
char* current_slab = nullptr;
|
|
||||||
char* current_ptr = nullptr;
|
|
||||||
size_t remaining = 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace Dynarmic::Common
|
|
|
@ -13,7 +13,6 @@
|
||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
#include <mcl/assert.hpp>
|
#include <mcl/assert.hpp>
|
||||||
|
|
||||||
#include "dynarmic/common/memory_pool.h"
|
|
||||||
#include "dynarmic/frontend/A32/a32_types.h"
|
#include "dynarmic/frontend/A32/a32_types.h"
|
||||||
#include "dynarmic/frontend/A64/a64_types.h"
|
#include "dynarmic/frontend/A64/a64_types.h"
|
||||||
#include "dynarmic/ir/cond.h"
|
#include "dynarmic/ir/cond.h"
|
||||||
|
@ -24,8 +23,7 @@ namespace Dynarmic::IR {
|
||||||
Block::Block(const LocationDescriptor& location)
|
Block::Block(const LocationDescriptor& location)
|
||||||
: location{location},
|
: location{location},
|
||||||
end_location{location},
|
end_location{location},
|
||||||
cond{Cond::AL},
|
cond{Cond::AL}
|
||||||
instruction_alloc_pool{std::make_unique<std::remove_reference_t<decltype(*instruction_alloc_pool)>>()}
|
|
||||||
{
|
{
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -37,7 +35,7 @@ Block::Block(const LocationDescriptor& location)
|
||||||
/// @param args A sequence of Value instances used as arguments for the instruction.
|
/// @param args A sequence of Value instances used as arguments for the instruction.
|
||||||
/// @returns Iterator to the newly created instruction.
|
/// @returns Iterator to the newly created instruction.
|
||||||
Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list<Value> args) noexcept {
|
Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list<Value> args) noexcept {
|
||||||
IR::Inst* inst = new (instruction_alloc_pool->Alloc()) IR::Inst(opcode);
|
IR::Inst* inst = new IR::Inst(opcode);
|
||||||
DEBUG_ASSERT(args.size() == inst->NumArgs());
|
DEBUG_ASSERT(args.size() == inst->NumArgs());
|
||||||
std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable {
|
std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable {
|
||||||
inst->SetArg(index, arg);
|
inst->SetArg(index, arg);
|
||||||
|
@ -83,9 +81,7 @@ static std::string TerminalToString(const Terminal& terminal_variant) noexcept {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string DumpBlock(const IR::Block& block) noexcept {
|
std::string DumpBlock(const IR::Block& block) noexcept {
|
||||||
std::string ret;
|
std::string ret = fmt::format("Block: location={}\n", block.Location());
|
||||||
|
|
||||||
ret += fmt::format("Block: location={}\n", block.Location());
|
|
||||||
ret += fmt::format("cycles={}", block.CycleCount());
|
ret += fmt::format("cycles={}", block.CycleCount());
|
||||||
ret += fmt::format(", entry_cond={}", A64::CondToString(block.GetCondition()));
|
ret += fmt::format(", entry_cond={}", A64::CondToString(block.GetCondition()));
|
||||||
if (block.GetCondition() != Cond::AL) {
|
if (block.GetCondition() != Cond::AL) {
|
||||||
|
@ -113,6 +109,8 @@ std::string DumpBlock(const IR::Block& block) noexcept {
|
||||||
return fmt::format("#{:#x}", arg.GetU32());
|
return fmt::format("#{:#x}", arg.GetU32());
|
||||||
case Type::U64:
|
case Type::U64:
|
||||||
return fmt::format("#{:#x}", arg.GetU64());
|
return fmt::format("#{:#x}", arg.GetU64());
|
||||||
|
case Type::U128:
|
||||||
|
return fmt::format("#<u128>");
|
||||||
case Type::A32Reg:
|
case Type::A32Reg:
|
||||||
return A32::RegToString(arg.GetA32RegRef());
|
return A32::RegToString(arg.GetA32RegRef());
|
||||||
case Type::A32ExtReg:
|
case Type::A32ExtReg:
|
||||||
|
@ -155,14 +153,9 @@ std::string DumpBlock(const IR::Block& block) noexcept {
|
||||||
ret += fmt::format("<type error: {} != {}>", GetNameOf(actual_type), GetNameOf(expected_type));
|
ret += fmt::format("<type error: {} != {}>", GetNameOf(actual_type), GetNameOf(expected_type));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
ret += fmt::format(" (uses: {})\n", inst.UseCount());
|
||||||
ret += fmt::format(" (uses: {})", inst.UseCount());
|
|
||||||
|
|
||||||
ret += '\n';
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ret += "terminal = " + TerminalToString(block.GetTerminal()) + '\n';
|
ret += "terminal = " + TerminalToString(block.GetTerminal()) + '\n';
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,8 +17,6 @@
|
||||||
#include "dynarmic/ir/microinstruction.h"
|
#include "dynarmic/ir/microinstruction.h"
|
||||||
#include "dynarmic/ir/terminal.h"
|
#include "dynarmic/ir/terminal.h"
|
||||||
#include "dynarmic/ir/value.h"
|
#include "dynarmic/ir/value.h"
|
||||||
#include "dynarmic/ir/dense_list.h"
|
|
||||||
#include "dynarmic/common/memory_pool.h"
|
|
||||||
|
|
||||||
namespace Dynarmic::IR {
|
namespace Dynarmic::IR {
|
||||||
|
|
||||||
|
@ -76,7 +74,7 @@ public:
|
||||||
/// @param op Opcode representing the instruction to add.
|
/// @param op Opcode representing the instruction to add.
|
||||||
/// @param args A sequence of Value instances used as arguments for the instruction.
|
/// @param args A sequence of Value instances used as arguments for the instruction.
|
||||||
inline void AppendNewInst(const Opcode opcode, const std::initializer_list<IR::Value> args) noexcept {
|
inline void AppendNewInst(const Opcode opcode, const std::initializer_list<IR::Value> args) noexcept {
|
||||||
PrependNewInst(end(), opcode, args);
|
PrependNewInst(instructions.end(), opcode, args);
|
||||||
}
|
}
|
||||||
iterator PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list<Value> args) noexcept;
|
iterator PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list<Value> args) noexcept;
|
||||||
|
|
||||||
|
@ -171,8 +169,6 @@ private:
|
||||||
LocationDescriptor end_location;
|
LocationDescriptor end_location;
|
||||||
/// Conditional to pass in order to execute this block
|
/// Conditional to pass in order to execute this block
|
||||||
Cond cond;
|
Cond cond;
|
||||||
/// Memory pool for instruction list
|
|
||||||
std::unique_ptr<Common::Pool<sizeof(Inst), 2097152UL / sizeof(Inst)>> instruction_alloc_pool;
|
|
||||||
/// Terminal instruction of this block.
|
/// Terminal instruction of this block.
|
||||||
Terminal terminal = Term::Invalid{};
|
Terminal terminal = Term::Invalid{};
|
||||||
/// Number of cycles this block takes to execute if the conditional fails.
|
/// Number of cycles this block takes to execute if the conditional fails.
|
||||||
|
|
58
externals/dynarmic/src/dynarmic/ir/dense_list.h
vendored
58
externals/dynarmic/src/dynarmic/ir/dense_list.h
vendored
|
@ -1,58 +0,0 @@
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <cstdint>
|
|
||||||
#include <cstddef>
|
|
||||||
#include <deque>
|
|
||||||
|
|
||||||
namespace Dynarmic {
|
|
||||||
template<typename T> struct dense_list {
|
|
||||||
using difference_type = std::ptrdiff_t;
|
|
||||||
using size_type = std::size_t;
|
|
||||||
using value_type = T;
|
|
||||||
using pointer = value_type*;
|
|
||||||
using const_pointer = const value_type*;
|
|
||||||
using reference = value_type&;
|
|
||||||
using const_reference = const value_type&;
|
|
||||||
using iterator = std::deque<value_type>::iterator;
|
|
||||||
using const_iterator = std::deque<value_type>::const_iterator;
|
|
||||||
using reverse_iterator = std::reverse_iterator<iterator>;
|
|
||||||
using const_reverse_iterator = std::reverse_iterator<const_iterator>;
|
|
||||||
|
|
||||||
inline bool empty() const noexcept { return list.empty(); }
|
|
||||||
inline size_type size() const noexcept { return list.size(); }
|
|
||||||
|
|
||||||
inline value_type& front() noexcept { return list.front(); }
|
|
||||||
inline const value_type& front() const noexcept { return list.front(); }
|
|
||||||
|
|
||||||
inline value_type& back() noexcept { return list.back(); }
|
|
||||||
inline const value_type& back() const noexcept { return list.back(); }
|
|
||||||
|
|
||||||
inline iterator begin() noexcept { return list.begin(); }
|
|
||||||
inline const_iterator begin() const noexcept { return list.begin(); }
|
|
||||||
inline iterator end() noexcept { return list.end(); }
|
|
||||||
inline const_iterator end() const noexcept { return list.end(); }
|
|
||||||
|
|
||||||
inline reverse_iterator rbegin() noexcept { return list.rbegin(); }
|
|
||||||
inline const_reverse_iterator rbegin() const noexcept { return list.rbegin(); }
|
|
||||||
inline reverse_iterator rend() noexcept { return list.rend(); }
|
|
||||||
inline const_reverse_iterator rend() const noexcept { return list.rend(); }
|
|
||||||
|
|
||||||
inline const_iterator cbegin() const noexcept { return list.cbegin(); }
|
|
||||||
inline const_iterator cend() const noexcept { return list.cend(); }
|
|
||||||
|
|
||||||
inline const_reverse_iterator crbegin() const noexcept { return list.crbegin(); }
|
|
||||||
inline const_reverse_iterator crend() const noexcept { return list.crend(); }
|
|
||||||
|
|
||||||
inline iterator insert_before(iterator it, value_type& value) noexcept {
|
|
||||||
if (it == list.begin()) {
|
|
||||||
list.push_front(value);
|
|
||||||
return list.begin();
|
|
||||||
}
|
|
||||||
auto const index = std::distance(list.begin(), it - 1);
|
|
||||||
list.insert(it - 1, value);
|
|
||||||
return list.begin() + index;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::deque<value_type> list;
|
|
||||||
};
|
|
||||||
}
|
|
|
@ -2947,19 +2947,10 @@ public:
|
||||||
block.SetTerminal(terminal);
|
block.SetTerminal(terminal);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetInsertionPointBefore(IR::Inst* new_insertion_point) {
|
|
||||||
insertion_point = IR::Block::iterator{*new_insertion_point};
|
|
||||||
}
|
|
||||||
|
|
||||||
void SetInsertionPointBefore(IR::Block::iterator new_insertion_point) {
|
void SetInsertionPointBefore(IR::Block::iterator new_insertion_point) {
|
||||||
insertion_point = new_insertion_point;
|
insertion_point = new_insertion_point;
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetInsertionPointAfter(IR::Inst* new_insertion_point) {
|
|
||||||
insertion_point = IR::Block::iterator{*new_insertion_point};
|
|
||||||
++insertion_point;
|
|
||||||
}
|
|
||||||
|
|
||||||
void SetInsertionPointAfter(IR::Block::iterator new_insertion_point) {
|
void SetInsertionPointAfter(IR::Block::iterator new_insertion_point) {
|
||||||
insertion_point = new_insertion_point;
|
insertion_point = new_insertion_point;
|
||||||
++insertion_point;
|
++insertion_point;
|
||||||
|
|
|
@ -22,8 +22,7 @@ namespace Dynarmic::Optimization {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
void FlagsPass(IR::Block& block) {
|
void FlagsPass(IR::Block& block) {
|
||||||
using Iterator = std::reverse_iterator<IR::Block::iterator>;
|
using Iterator = IR::Block::reverse_iterator;
|
||||||
|
|
||||||
struct FlagInfo {
|
struct FlagInfo {
|
||||||
bool set_not_required = false;
|
bool set_not_required = false;
|
||||||
bool has_value_request = false;
|
bool has_value_request = false;
|
||||||
|
@ -185,10 +184,10 @@ void RegisterPass(IR::Block& block) {
|
||||||
using Iterator = IR::Block::iterator;
|
using Iterator = IR::Block::iterator;
|
||||||
|
|
||||||
struct RegInfo {
|
struct RegInfo {
|
||||||
IR::Value register_value;
|
|
||||||
std::optional<Iterator> last_set_instruction;
|
std::optional<Iterator> last_set_instruction;
|
||||||
|
IR::Value register_value;
|
||||||
};
|
};
|
||||||
std::array<RegInfo, 15> reg_info;
|
alignas(64) std::array<RegInfo, 15> reg_info;
|
||||||
|
|
||||||
const auto do_get = [](RegInfo& info, Iterator get_inst) {
|
const auto do_get = [](RegInfo& info, Iterator get_inst) {
|
||||||
if (info.register_value.IsEmpty()) {
|
if (info.register_value.IsEmpty()) {
|
||||||
|
@ -203,12 +202,12 @@ void RegisterPass(IR::Block& block) {
|
||||||
(*info.last_set_instruction)->Invalidate();
|
(*info.last_set_instruction)->Invalidate();
|
||||||
}
|
}
|
||||||
info = {
|
info = {
|
||||||
.register_value = value,
|
|
||||||
.last_set_instruction = set_inst,
|
.last_set_instruction = set_inst,
|
||||||
|
.register_value = value,
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class ExtValueType {
|
enum class ExtValueType : std::uint8_t {
|
||||||
Empty,
|
Empty,
|
||||||
Single,
|
Single,
|
||||||
Double,
|
Double,
|
||||||
|
@ -216,19 +215,20 @@ void RegisterPass(IR::Block& block) {
|
||||||
VectorQuad,
|
VectorQuad,
|
||||||
};
|
};
|
||||||
struct ExtRegInfo {
|
struct ExtRegInfo {
|
||||||
ExtValueType value_type = {};
|
|
||||||
IR::Value register_value;
|
IR::Value register_value;
|
||||||
std::optional<Iterator> last_set_instruction;
|
std::optional<Iterator> last_set_instruction;
|
||||||
|
ExtValueType value_type = {};
|
||||||
};
|
};
|
||||||
std::array<ExtRegInfo, 64> ext_reg_info;
|
// Max returned by RegNumber = 31 (but multiplied by 4 in some cases)
|
||||||
|
alignas(64) std::array<ExtRegInfo, 128> ext_reg_info;
|
||||||
|
|
||||||
const auto do_ext_get = [](ExtValueType type, std::initializer_list<std::reference_wrapper<ExtRegInfo>> infos, Iterator get_inst) {
|
const auto do_ext_get = [](ExtValueType type, std::initializer_list<std::reference_wrapper<ExtRegInfo>> infos, Iterator get_inst) {
|
||||||
if (!std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) {
|
if (!std::all_of(infos.begin(), infos.end(), [type](const auto& info) { return info.get().value_type == type; })) {
|
||||||
for (auto& info : infos) {
|
for (auto& info : infos) {
|
||||||
info.get() = {
|
info.get() = {
|
||||||
.value_type = type,
|
|
||||||
.register_value = IR::Value(&*get_inst),
|
.register_value = IR::Value(&*get_inst),
|
||||||
.last_set_instruction = std::nullopt,
|
.last_set_instruction = std::nullopt,
|
||||||
|
.value_type = type,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
|
@ -244,9 +244,9 @@ void RegisterPass(IR::Block& block) {
|
||||||
}
|
}
|
||||||
for (auto& info : infos) {
|
for (auto& info : infos) {
|
||||||
info.get() = {
|
info.get() = {
|
||||||
.value_type = type,
|
|
||||||
.register_value = value,
|
.register_value = value,
|
||||||
.last_set_instruction = set_inst,
|
.last_set_instruction = set_inst,
|
||||||
|
.value_type = type,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -17,7 +17,8 @@ void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto& inst : block) {
|
for (auto iter = block.begin(); iter != block.end(); iter++) {
|
||||||
|
auto& inst = *iter;
|
||||||
if (inst.GetOpcode() != IR::Opcode::A64DataCacheOperationRaised) {
|
if (inst.GetOpcode() != IR::Opcode::A64DataCacheOperationRaised) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -26,7 +27,7 @@ void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf) {
|
||||||
if (op == A64::DataCacheOperation::ZeroByVA) {
|
if (op == A64::DataCacheOperation::ZeroByVA) {
|
||||||
A64::IREmitter ir{block};
|
A64::IREmitter ir{block};
|
||||||
ir.current_location = A64::LocationDescriptor{IR::LocationDescriptor{inst.GetArg(0).GetU64()}};
|
ir.current_location = A64::LocationDescriptor{IR::LocationDescriptor{inst.GetArg(0).GetU64()}};
|
||||||
ir.SetInsertionPointBefore(&inst);
|
ir.SetInsertionPointBefore(iter);
|
||||||
|
|
||||||
size_t bytes = 4 << static_cast<size_t>(conf.dczid_el0 & 0b1111);
|
size_t bytes = 4 << static_cast<size_t>(conf.dczid_el0 & 0b1111);
|
||||||
IR::U64 addr{inst.GetArg(2)};
|
IR::U64 addr{inst.GetArg(2)};
|
||||||
|
|
|
@ -22,9 +22,9 @@ using Op = Dynarmic::IR::Opcode;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
// Tiny helper to avoid the need to store based off the opcode
|
/// Tiny helper to avoid the need to store based off the opcode
|
||||||
// bit size all over the place within folding functions.
|
/// bit size all over the place within folding functions.
|
||||||
void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) {
|
static void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) {
|
||||||
if (is_32_bit) {
|
if (is_32_bit) {
|
||||||
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)});
|
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)});
|
||||||
} else {
|
} else {
|
||||||
|
@ -32,12 +32,12 @@ void ReplaceUsesWith(IR::Inst& inst, bool is_32_bit, u64 value) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
IR::Value Value(bool is_32_bit, u64 value) {
|
static IR::Value Value(bool is_32_bit, u64 value) {
|
||||||
return is_32_bit ? IR::Value{static_cast<u32>(value)} : IR::Value{value};
|
return is_32_bit ? IR::Value{static_cast<u32>(value)} : IR::Value{value};
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename ImmFn>
|
template<typename ImmFn>
|
||||||
bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) {
|
static bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) {
|
||||||
const auto lhs = inst.GetArg(0);
|
const auto lhs = inst.GetArg(0);
|
||||||
const auto rhs = inst.GetArg(1);
|
const auto rhs = inst.GetArg(1);
|
||||||
|
|
||||||
|
@ -75,7 +75,7 @@ bool FoldCommutative(IR::Inst& inst, bool is_32_bit, ImmFn imm_fn) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void FoldAdd(IR::Inst& inst, bool is_32_bit) {
|
static void FoldAdd(IR::Inst& inst, bool is_32_bit) {
|
||||||
const auto lhs = inst.GetArg(0);
|
const auto lhs = inst.GetArg(0);
|
||||||
const auto rhs = inst.GetArg(1);
|
const auto rhs = inst.GetArg(1);
|
||||||
const auto carry = inst.GetArg(2);
|
const auto carry = inst.GetArg(2);
|
||||||
|
@ -125,7 +125,7 @@ void FoldAdd(IR::Inst& inst, bool is_32_bit) {
|
||||||
/// 4. x & y -> y (where x has all bits set to 1)
|
/// 4. x & y -> y (where x has all bits set to 1)
|
||||||
/// 5. x & y -> x (where y has all bits set to 1)
|
/// 5. x & y -> x (where y has all bits set to 1)
|
||||||
///
|
///
|
||||||
void FoldAND(IR::Inst& inst, bool is_32_bit) {
|
static void FoldAND(IR::Inst& inst, bool is_32_bit) {
|
||||||
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a & b; })) {
|
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a & b; })) {
|
||||||
const auto rhs = inst.GetArg(1);
|
const auto rhs = inst.GetArg(1);
|
||||||
if (rhs.IsZero()) {
|
if (rhs.IsZero()) {
|
||||||
|
@ -140,7 +140,7 @@ void FoldAND(IR::Inst& inst, bool is_32_bit) {
|
||||||
///
|
///
|
||||||
/// 1. imm -> swap(imm)
|
/// 1. imm -> swap(imm)
|
||||||
///
|
///
|
||||||
void FoldByteReverse(IR::Inst& inst, Op op) {
|
static void FoldByteReverse(IR::Inst& inst, Op op) {
|
||||||
const auto operand = inst.GetArg(0);
|
const auto operand = inst.GetArg(0);
|
||||||
|
|
||||||
if (!operand.IsImmediate()) {
|
if (!operand.IsImmediate()) {
|
||||||
|
@ -165,7 +165,7 @@ void FoldByteReverse(IR::Inst& inst, Op op) {
|
||||||
/// 2. imm_x / imm_y -> result
|
/// 2. imm_x / imm_y -> result
|
||||||
/// 3. x / 1 -> x
|
/// 3. x / 1 -> x
|
||||||
///
|
///
|
||||||
void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) {
|
static void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) {
|
||||||
const auto rhs = inst.GetArg(1);
|
const auto rhs = inst.GetArg(1);
|
||||||
|
|
||||||
if (rhs.IsZero()) {
|
if (rhs.IsZero()) {
|
||||||
|
@ -193,7 +193,7 @@ void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) {
|
||||||
// 2. x ^ 0 -> x
|
// 2. x ^ 0 -> x
|
||||||
// 3. 0 ^ y -> y
|
// 3. 0 ^ y -> y
|
||||||
//
|
//
|
||||||
void FoldEOR(IR::Inst& inst, bool is_32_bit) {
|
static void FoldEOR(IR::Inst& inst, bool is_32_bit) {
|
||||||
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a ^ b; })) {
|
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a ^ b; })) {
|
||||||
const auto rhs = inst.GetArg(1);
|
const auto rhs = inst.GetArg(1);
|
||||||
if (rhs.IsZero()) {
|
if (rhs.IsZero()) {
|
||||||
|
@ -202,7 +202,7 @@ void FoldEOR(IR::Inst& inst, bool is_32_bit) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void FoldLeastSignificantByte(IR::Inst& inst) {
|
static void FoldLeastSignificantByte(IR::Inst& inst) {
|
||||||
if (!inst.AreAllArgsImmediates()) {
|
if (!inst.AreAllArgsImmediates()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -211,7 +211,7 @@ void FoldLeastSignificantByte(IR::Inst& inst) {
|
||||||
inst.ReplaceUsesWith(IR::Value{static_cast<u8>(operand.GetImmediateAsU64())});
|
inst.ReplaceUsesWith(IR::Value{static_cast<u8>(operand.GetImmediateAsU64())});
|
||||||
}
|
}
|
||||||
|
|
||||||
void FoldLeastSignificantHalf(IR::Inst& inst) {
|
static void FoldLeastSignificantHalf(IR::Inst& inst) {
|
||||||
if (!inst.AreAllArgsImmediates()) {
|
if (!inst.AreAllArgsImmediates()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -220,7 +220,7 @@ void FoldLeastSignificantHalf(IR::Inst& inst) {
|
||||||
inst.ReplaceUsesWith(IR::Value{static_cast<u16>(operand.GetImmediateAsU64())});
|
inst.ReplaceUsesWith(IR::Value{static_cast<u16>(operand.GetImmediateAsU64())});
|
||||||
}
|
}
|
||||||
|
|
||||||
void FoldLeastSignificantWord(IR::Inst& inst) {
|
static void FoldLeastSignificantWord(IR::Inst& inst) {
|
||||||
if (!inst.AreAllArgsImmediates()) {
|
if (!inst.AreAllArgsImmediates()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -229,7 +229,7 @@ void FoldLeastSignificantWord(IR::Inst& inst) {
|
||||||
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(operand.GetImmediateAsU64())});
|
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(operand.GetImmediateAsU64())});
|
||||||
}
|
}
|
||||||
|
|
||||||
void FoldMostSignificantBit(IR::Inst& inst) {
|
static void FoldMostSignificantBit(IR::Inst& inst) {
|
||||||
if (!inst.AreAllArgsImmediates()) {
|
if (!inst.AreAllArgsImmediates()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -238,7 +238,7 @@ void FoldMostSignificantBit(IR::Inst& inst) {
|
||||||
inst.ReplaceUsesWith(IR::Value{(operand.GetImmediateAsU64() >> 31) != 0});
|
inst.ReplaceUsesWith(IR::Value{(operand.GetImmediateAsU64() >> 31) != 0});
|
||||||
}
|
}
|
||||||
|
|
||||||
void FoldMostSignificantWord(IR::Inst& inst) {
|
static void FoldMostSignificantWord(IR::Inst& inst) {
|
||||||
IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp);
|
IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp);
|
||||||
|
|
||||||
if (!inst.AreAllArgsImmediates()) {
|
if (!inst.AreAllArgsImmediates()) {
|
||||||
|
@ -260,7 +260,7 @@ void FoldMostSignificantWord(IR::Inst& inst) {
|
||||||
// 4. x * 1 -> x
|
// 4. x * 1 -> x
|
||||||
// 5. 1 * y -> y
|
// 5. 1 * y -> y
|
||||||
//
|
//
|
||||||
void FoldMultiply(IR::Inst& inst, bool is_32_bit) {
|
static void FoldMultiply(IR::Inst& inst, bool is_32_bit) {
|
||||||
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a * b; })) {
|
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a * b; })) {
|
||||||
const auto rhs = inst.GetArg(1);
|
const auto rhs = inst.GetArg(1);
|
||||||
if (rhs.IsZero()) {
|
if (rhs.IsZero()) {
|
||||||
|
@ -272,7 +272,7 @@ void FoldMultiply(IR::Inst& inst, bool is_32_bit) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Folds NOT operations if the contained value is an immediate.
|
// Folds NOT operations if the contained value is an immediate.
|
||||||
void FoldNOT(IR::Inst& inst, bool is_32_bit) {
|
static void FoldNOT(IR::Inst& inst, bool is_32_bit) {
|
||||||
const auto operand = inst.GetArg(0);
|
const auto operand = inst.GetArg(0);
|
||||||
|
|
||||||
if (!operand.IsImmediate()) {
|
if (!operand.IsImmediate()) {
|
||||||
|
@ -289,7 +289,7 @@ void FoldNOT(IR::Inst& inst, bool is_32_bit) {
|
||||||
// 2. x | 0 -> x
|
// 2. x | 0 -> x
|
||||||
// 3. 0 | y -> y
|
// 3. 0 | y -> y
|
||||||
//
|
//
|
||||||
void FoldOR(IR::Inst& inst, bool is_32_bit) {
|
static void FoldOR(IR::Inst& inst, bool is_32_bit) {
|
||||||
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a | b; })) {
|
if (FoldCommutative(inst, is_32_bit, [](u64 a, u64 b) { return a | b; })) {
|
||||||
const auto rhs = inst.GetArg(1);
|
const auto rhs = inst.GetArg(1);
|
||||||
if (rhs.IsZero()) {
|
if (rhs.IsZero()) {
|
||||||
|
@ -298,7 +298,7 @@ void FoldOR(IR::Inst& inst, bool is_32_bit) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool FoldShifts(IR::Inst& inst) {
|
static bool FoldShifts(IR::Inst& inst) {
|
||||||
IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp);
|
IR::Inst* carry_inst = inst.GetAssociatedPseudoOperation(Op::GetCarryFromOp);
|
||||||
|
|
||||||
// The 32-bit variants can contain 3 arguments, while the
|
// The 32-bit variants can contain 3 arguments, while the
|
||||||
|
@ -328,7 +328,7 @@ bool FoldShifts(IR::Inst& inst) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void FoldSignExtendXToWord(IR::Inst& inst) {
|
static void FoldSignExtendXToWord(IR::Inst& inst) {
|
||||||
if (!inst.AreAllArgsImmediates()) {
|
if (!inst.AreAllArgsImmediates()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -337,7 +337,7 @@ void FoldSignExtendXToWord(IR::Inst& inst) {
|
||||||
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)});
|
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)});
|
||||||
}
|
}
|
||||||
|
|
||||||
void FoldSignExtendXToLong(IR::Inst& inst) {
|
static void FoldSignExtendXToLong(IR::Inst& inst) {
|
||||||
if (!inst.AreAllArgsImmediates()) {
|
if (!inst.AreAllArgsImmediates()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -346,7 +346,7 @@ void FoldSignExtendXToLong(IR::Inst& inst) {
|
||||||
inst.ReplaceUsesWith(IR::Value{static_cast<u64>(value)});
|
inst.ReplaceUsesWith(IR::Value{static_cast<u64>(value)});
|
||||||
}
|
}
|
||||||
|
|
||||||
void FoldSub(IR::Inst& inst, bool is_32_bit) {
|
static void FoldSub(IR::Inst& inst, bool is_32_bit) {
|
||||||
if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) {
|
if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -359,7 +359,7 @@ void FoldSub(IR::Inst& inst, bool is_32_bit) {
|
||||||
ReplaceUsesWith(inst, is_32_bit, result);
|
ReplaceUsesWith(inst, is_32_bit, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
void FoldZeroExtendXToWord(IR::Inst& inst) {
|
static void FoldZeroExtendXToWord(IR::Inst& inst) {
|
||||||
if (!inst.AreAllArgsImmediates()) {
|
if (!inst.AreAllArgsImmediates()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -368,7 +368,7 @@ void FoldZeroExtendXToWord(IR::Inst& inst) {
|
||||||
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)});
|
inst.ReplaceUsesWith(IR::Value{static_cast<u32>(value)});
|
||||||
}
|
}
|
||||||
|
|
||||||
void FoldZeroExtendXToLong(IR::Inst& inst) {
|
static void FoldZeroExtendXToLong(IR::Inst& inst) {
|
||||||
if (!inst.AreAllArgsImmediates()) {
|
if (!inst.AreAllArgsImmediates()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,11 +13,8 @@ namespace Dynarmic::Optimization {
|
||||||
|
|
||||||
void IdentityRemovalPass(IR::Block& block) {
|
void IdentityRemovalPass(IR::Block& block) {
|
||||||
std::vector<IR::Inst*> to_invalidate;
|
std::vector<IR::Inst*> to_invalidate;
|
||||||
|
for (auto iter = block.begin(); iter != block.end(); ) {
|
||||||
auto iter = block.begin();
|
|
||||||
while (iter != block.end()) {
|
|
||||||
IR::Inst& inst = *iter;
|
IR::Inst& inst = *iter;
|
||||||
|
|
||||||
const size_t num_args = inst.NumArgs();
|
const size_t num_args = inst.NumArgs();
|
||||||
for (size_t i = 0; i < num_args; i++) {
|
for (size_t i = 0; i < num_args; i++) {
|
||||||
while (true) {
|
while (true) {
|
||||||
|
@ -27,18 +24,15 @@ void IdentityRemovalPass(IR::Block& block) {
|
||||||
inst.SetArg(i, arg.GetInst()->GetArg(0));
|
inst.SetArg(i, arg.GetInst()->GetArg(0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (inst.GetOpcode() == IR::Opcode::Identity || inst.GetOpcode() == IR::Opcode::Void) {
|
if (inst.GetOpcode() == IR::Opcode::Identity || inst.GetOpcode() == IR::Opcode::Void) {
|
||||||
iter = block.Instructions().erase(inst);
|
iter = block.Instructions().erase(iter);
|
||||||
to_invalidate.push_back(&inst);
|
to_invalidate.push_back(&inst);
|
||||||
} else {
|
} else {
|
||||||
++iter;
|
++iter;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for (auto* inst : to_invalidate)
|
||||||
for (IR::Inst* inst : to_invalidate) {
|
|
||||||
inst->Invalidate();
|
inst->Invalidate();
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Dynarmic::Optimization
|
} // namespace Dynarmic::Optimization
|
||||||
|
|
|
@ -13,7 +13,7 @@ namespace Dynarmic::Optimization {
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) {
|
static void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) {
|
||||||
const IR::U128 x = (IR::U128)inst.GetArg(0);
|
const IR::U128 x = (IR::U128)inst.GetArg(0);
|
||||||
const IR::U128 y = (IR::U128)inst.GetArg(1);
|
const IR::U128 y = (IR::U128)inst.GetArg(1);
|
||||||
|
|
||||||
|
@ -37,13 +37,14 @@ void PolyfillSHA256MessageSchedule0(IR::IREmitter& ir, IR::Inst& inst) {
|
||||||
inst.ReplaceUsesWith(result);
|
inst.ReplaceUsesWith(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) {
|
static void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) {
|
||||||
const IR::U128 x = (IR::U128)inst.GetArg(0);
|
const IR::U128 x = (IR::U128)inst.GetArg(0);
|
||||||
const IR::U128 y = (IR::U128)inst.GetArg(1);
|
const IR::U128 y = (IR::U128)inst.GetArg(1);
|
||||||
const IR::U128 z = (IR::U128)inst.GetArg(2);
|
const IR::U128 z = (IR::U128)inst.GetArg(2);
|
||||||
|
|
||||||
const IR::U128 T0 = ir.VectorExtract(y, z, 32);
|
const IR::U128 T0 = ir.VectorExtract(y, z, 32);
|
||||||
|
|
||||||
|
// TODO: this can use better pipelining m8
|
||||||
const IR::U128 lower_half = [&] {
|
const IR::U128 lower_half = [&] {
|
||||||
const IR::U128 T = ir.VectorRotateWholeVectorRight(z, 64);
|
const IR::U128 T = ir.VectorRotateWholeVectorRight(z, 64);
|
||||||
const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17);
|
const IR::U128 tmp1 = ir.VectorRotateRight(32, T, 17);
|
||||||
|
@ -73,15 +74,15 @@ void PolyfillSHA256MessageSchedule1(IR::IREmitter& ir, IR::Inst& inst) {
|
||||||
inst.ReplaceUsesWith(result);
|
inst.ReplaceUsesWith(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) {
|
static IR::U32 SHAchoose(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) {
|
||||||
return ir.Eor(ir.And(ir.Eor(y, z), x), z);
|
return ir.Eor(ir.And(ir.Eor(y, z), x), z);
|
||||||
}
|
}
|
||||||
|
|
||||||
IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) {
|
static IR::U32 SHAmajority(IR::IREmitter& ir, IR::U32 x, IR::U32 y, IR::U32 z) {
|
||||||
return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z));
|
return ir.Or(ir.And(x, y), ir.And(ir.Or(x, y), z));
|
||||||
}
|
}
|
||||||
|
|
||||||
IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) {
|
static IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) {
|
||||||
const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2));
|
const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(2));
|
||||||
const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13));
|
const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(13));
|
||||||
const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22));
|
const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(22));
|
||||||
|
@ -89,7 +90,7 @@ IR::U32 SHAhashSIGMA0(IR::IREmitter& ir, IR::U32 x) {
|
||||||
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3));
|
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3));
|
||||||
}
|
}
|
||||||
|
|
||||||
IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) {
|
static IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) {
|
||||||
const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6));
|
const IR::U32 tmp1 = ir.RotateRight(x, ir.Imm8(6));
|
||||||
const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11));
|
const IR::U32 tmp2 = ir.RotateRight(x, ir.Imm8(11));
|
||||||
const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25));
|
const IR::U32 tmp3 = ir.RotateRight(x, ir.Imm8(25));
|
||||||
|
@ -97,7 +98,7 @@ IR::U32 SHAhashSIGMA1(IR::IREmitter& ir, IR::U32 x) {
|
||||||
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3));
|
return ir.Eor(tmp1, ir.Eor(tmp2, tmp3));
|
||||||
}
|
}
|
||||||
|
|
||||||
void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) {
|
static void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) {
|
||||||
IR::U128 x = (IR::U128)inst.GetArg(0);
|
IR::U128 x = (IR::U128)inst.GetArg(0);
|
||||||
IR::U128 y = (IR::U128)inst.GetArg(1);
|
IR::U128 y = (IR::U128)inst.GetArg(1);
|
||||||
const IR::U128 w = (IR::U128)inst.GetArg(2);
|
const IR::U128 w = (IR::U128)inst.GetArg(2);
|
||||||
|
@ -139,7 +140,7 @@ void PolyfillSHA256Hash(IR::IREmitter& ir, IR::Inst& inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
template<size_t esize, bool is_signed>
|
template<size_t esize, bool is_signed>
|
||||||
void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) {
|
static void PolyfillVectorMultiplyWiden(IR::IREmitter& ir, IR::Inst& inst) {
|
||||||
IR::U128 n = (IR::U128)inst.GetArg(0);
|
IR::U128 n = (IR::U128)inst.GetArg(0);
|
||||||
IR::U128 m = (IR::U128)inst.GetArg(1);
|
IR::U128 m = (IR::U128)inst.GetArg(1);
|
||||||
|
|
||||||
|
@ -159,54 +160,52 @@ void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) {
|
||||||
}
|
}
|
||||||
|
|
||||||
IR::IREmitter ir{block};
|
IR::IREmitter ir{block};
|
||||||
|
for (auto iter = block.begin(); iter != block.end(); iter++) {
|
||||||
for (auto& inst : block) {
|
ir.SetInsertionPointBefore(iter);
|
||||||
ir.SetInsertionPointBefore(&inst);
|
switch (iter->GetOpcode()) {
|
||||||
|
|
||||||
switch (inst.GetOpcode()) {
|
|
||||||
case IR::Opcode::SHA256MessageSchedule0:
|
case IR::Opcode::SHA256MessageSchedule0:
|
||||||
if (polyfill.sha256) {
|
if (polyfill.sha256) {
|
||||||
PolyfillSHA256MessageSchedule0(ir, inst);
|
PolyfillSHA256MessageSchedule0(ir, *iter);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case IR::Opcode::SHA256MessageSchedule1:
|
case IR::Opcode::SHA256MessageSchedule1:
|
||||||
if (polyfill.sha256) {
|
if (polyfill.sha256) {
|
||||||
PolyfillSHA256MessageSchedule1(ir, inst);
|
PolyfillSHA256MessageSchedule1(ir, *iter);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case IR::Opcode::SHA256Hash:
|
case IR::Opcode::SHA256Hash:
|
||||||
if (polyfill.sha256) {
|
if (polyfill.sha256) {
|
||||||
PolyfillSHA256Hash(ir, inst);
|
PolyfillSHA256Hash(ir, *iter);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case IR::Opcode::VectorMultiplySignedWiden8:
|
case IR::Opcode::VectorMultiplySignedWiden8:
|
||||||
if (polyfill.vector_multiply_widen) {
|
if (polyfill.vector_multiply_widen) {
|
||||||
PolyfillVectorMultiplyWiden<8, true>(ir, inst);
|
PolyfillVectorMultiplyWiden<8, true>(ir, *iter);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case IR::Opcode::VectorMultiplySignedWiden16:
|
case IR::Opcode::VectorMultiplySignedWiden16:
|
||||||
if (polyfill.vector_multiply_widen) {
|
if (polyfill.vector_multiply_widen) {
|
||||||
PolyfillVectorMultiplyWiden<16, true>(ir, inst);
|
PolyfillVectorMultiplyWiden<16, true>(ir, *iter);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case IR::Opcode::VectorMultiplySignedWiden32:
|
case IR::Opcode::VectorMultiplySignedWiden32:
|
||||||
if (polyfill.vector_multiply_widen) {
|
if (polyfill.vector_multiply_widen) {
|
||||||
PolyfillVectorMultiplyWiden<32, true>(ir, inst);
|
PolyfillVectorMultiplyWiden<32, true>(ir, *iter);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case IR::Opcode::VectorMultiplyUnsignedWiden8:
|
case IR::Opcode::VectorMultiplyUnsignedWiden8:
|
||||||
if (polyfill.vector_multiply_widen) {
|
if (polyfill.vector_multiply_widen) {
|
||||||
PolyfillVectorMultiplyWiden<8, false>(ir, inst);
|
PolyfillVectorMultiplyWiden<8, false>(ir, *iter);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case IR::Opcode::VectorMultiplyUnsignedWiden16:
|
case IR::Opcode::VectorMultiplyUnsignedWiden16:
|
||||||
if (polyfill.vector_multiply_widen) {
|
if (polyfill.vector_multiply_widen) {
|
||||||
PolyfillVectorMultiplyWiden<16, false>(ir, inst);
|
PolyfillVectorMultiplyWiden<16, false>(ir, *iter);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case IR::Opcode::VectorMultiplyUnsignedWiden32:
|
case IR::Opcode::VectorMultiplyUnsignedWiden32:
|
||||||
if (polyfill.vector_multiply_widen) {
|
if (polyfill.vector_multiply_widen) {
|
||||||
PolyfillVectorMultiplyWiden<32, false>(ir, inst);
|
PolyfillVectorMultiplyWiden<32, false>(ir, *iter);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue