[dynarmic] free reord userconf to save 20 bytes

This commit is contained in:
lizzie 2025-07-11 05:06:14 +01:00
parent 0385b2fbda
commit cab69f5ea9
6 changed files with 140 additions and 125 deletions

View file

@ -15,7 +15,7 @@
namespace Dynarmic::A32 {
enum class ArchVersion;
enum class ArchVersion : std::uint8_t;
enum class CoprocReg;
enum class Exception;
enum class ExtReg;
@ -27,12 +27,11 @@ enum class Reg;
* The user of this class updates `current_location` as appropriate.
*/
class IREmitter : public IR::IREmitter {
IR::U64 ImmCurrentLocationDescriptor();
public:
IREmitter(IR::Block& block, LocationDescriptor descriptor, ArchVersion arch_version)
: IR::IREmitter(block), current_location(descriptor), arch_version(arch_version) {}
LocationDescriptor current_location;
size_t ArchVersion() const;
u32 PC() const;
@ -107,10 +106,9 @@ public:
IR::U64 CoprocGetTwoWords(size_t coproc_no, bool two, size_t opc, CoprocReg CRm);
void CoprocLoadWords(size_t coproc_no, bool two, bool long_transfer, CoprocReg CRd, const IR::U32& address, bool has_option, u8 option);
void CoprocStoreWords(size_t coproc_no, bool two, bool long_transfer, CoprocReg CRd, const IR::U32& address, bool has_option, u8 option);
private:
public:
LocationDescriptor current_location;
enum ArchVersion arch_version;
IR::U64 ImmCurrentLocationDescriptor();
};
} // namespace Dynarmic::A32

View file

@ -33,13 +33,11 @@ inline size_t ToFastLookupIndexArm(u32 instruction) {
} // namespace detail
template<typename V>
ArmDecodeTable<V> GetArmDecodeTable() {
constexpr ArmDecodeTable<V> GetArmDecodeTable() {
std::vector<ArmMatcher<V>> list = {
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
#include "./arm.inc"
#undef INST
};
// If a matcher has more bits in its mask it is more specific, so it should come first.
@ -62,9 +60,10 @@ ArmDecodeTable<V> GetArmDecodeTable() {
template<typename V>
std::optional<std::reference_wrapper<const ArmMatcher<V>>> DecodeArm(u32 instruction) {
static const auto table = GetArmDecodeTable<V>();
const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); };
alignas(64) static const auto table = GetArmDecodeTable<V>();
const auto matches_instruction = [instruction](const auto& matcher) {
return matcher.Matches(instruction);
};
const auto& subtable = table[detail::ToFastLookupIndexArm(instruction)];
auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction);

View file

@ -33,27 +33,26 @@ inline size_t ToFastLookupIndex(u32 instruction) {
} // namespace detail
template<typename V>
DecodeTable<V> GetDecodeTable() {
constexpr DecodeTable<V> GetDecodeTable() {
std::vector<Matcher<V>> list = {
#define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)),
#include "./a64.inc"
#undef INST
};
// If a matcher has more bits in its mask it is more specific, so it should come first.
std::stable_sort(list.begin(), list.end(), [](const auto& matcher1, const auto& matcher2) {
// If a matcher has more bits in its mask it is more specific, so it should come first.
return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask());
});
// Exceptions to the above rule of thumb.
const std::set<std::string> comes_first{
"MOVI, MVNI, ORR, BIC (vector, immediate)",
"FMOV (vector, immediate)",
"Unallocated SIMD modified immediate",
};
std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) {
return comes_first.count(matcher.GetName()) > 0;
return std::set<std::string>{
"MOVI, MVNI, ORR, BIC (vector, immediate)",
"FMOV (vector, immediate)",
"Unallocated SIMD modified immediate",
}.count(matcher.GetName()) > 0;
});
DecodeTable<V> table{};
@ -75,7 +74,6 @@ std::optional<std::reference_wrapper<const Matcher<V>>> Decode(u32 instruction)
const auto matches_instruction = [instruction](const auto& matcher) {
return matcher.Matches(instruction);
};
const auto& subtable = table[detail::ToFastLookupIndex(instruction)];
auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction);
return iter != subtable.end() ? std::optional<std::reference_wrapper<const Matcher<V>>>(*iter) : std::nullopt;

View file

@ -5,10 +5,12 @@
#pragma once
#include <stdint.h>
namespace Dynarmic {
namespace A32 {
enum class ArchVersion {
enum class ArchVersion : std::uint8_t {
v3,
v4,
v4T,

View file

@ -120,14 +120,32 @@ struct UserCallbacks : public TranslateCallbacks {
};
struct UserConfig {
bool HasOptimization(OptimizationFlag f) const {
if (!unsafe_optimizations) {
f &= all_safe_optimizations;
}
return (f & optimizations) != no_optimizations;
}
UserCallbacks* callbacks;
size_t processor_id = 0;
ExclusiveMonitor* global_monitor = nullptr;
/// Select the architecture version to use.
/// There are minor behavioural differences between versions.
ArchVersion arch_version = ArchVersion::v8;
// Page Table
// The page table is used for faster memory access. If an entry in the table is nullptr,
// the JIT will fallback to calling the MemoryRead*/MemoryWrite* callbacks.
static constexpr std::size_t PAGE_BITS = 12;
static constexpr std::size_t NUM_PAGE_TABLE_ENTRIES = 1 << (32 - PAGE_BITS);
std::array<std::uint8_t*, NUM_PAGE_TABLE_ENTRIES>* page_table = nullptr;
/// Coprocessors
std::array<std::shared_ptr<Coprocessor>, 16> coprocessors{};
/// Fastmem Pointer
/// This should point to the beginning of a 4GB address space which is in arranged just like
/// what you wish for emulated memory to be. If the host page faults on an address, the JIT
/// will fallback to calling the MemoryRead*/MemoryWrite* callbacks.
std::optional<uintptr_t> fastmem_pointer = std::nullopt;
/// This selects other optimizations than can't otherwise be disabled by setting other
/// configuration options. This includes:
@ -137,12 +155,29 @@ struct UserConfig {
/// This is intended to be used for debugging.
OptimizationFlag optimizations = all_safe_optimizations;
bool HasOptimization(OptimizationFlag f) const {
if (!unsafe_optimizations) {
f &= all_safe_optimizations;
}
return (f & optimizations) != no_optimizations;
}
/// Minimum size is about 8MiB. Maximum size is about 128MiB (arm64 host) or 2GiB (x64 host).
/// Maximum size is limited by the maximum length of a x86_64 / arm64 jump.
std::uint32_t code_cache_size = 128 * 1024 * 1024; // bytes
/// Processor ID
std::uint32_t processor_id = 0;
/// Masks out the first N bits in host pointers from the page table.
/// The intention behind this is to allow users of Dynarmic to pack attributes in the
/// same integer and update the pointer attribute pair atomically.
/// If the configured value is 3, all pointers will be forcefully aligned to 8 bytes.
std::int32_t page_table_pointer_mask_bits = 0;
/// Select the architecture version to use.
/// There are minor behavioural differences between versions.
ArchVersion arch_version = ArchVersion::v8;
/// Determines if we should detect memory accesses via page_table that straddle are
/// misaligned. Accesses that straddle page boundaries will fallback to the relevant
/// memory callback.
/// This value should be the required access sizes this applies to ORed together.
/// To detect any access, use: 8 | 16 | 32 | 64.
std::uint8_t detect_misaligned_access_via_page_table = 0;
/// This enables unsafe optimizations that reduce emulation accuracy in favour of speed.
/// For safety, in order to enable unsafe optimizations you have to set BOTH this flag
@ -150,12 +185,6 @@ struct UserConfig {
/// The prefered and tested mode for this library is with unsafe optimizations disabled.
bool unsafe_optimizations = false;
// Page Table
// The page table is used for faster memory access. If an entry in the table is nullptr,
// the JIT will fallback to calling the MemoryRead*/MemoryWrite* callbacks.
static constexpr std::size_t PAGE_BITS = 12;
static constexpr std::size_t NUM_PAGE_TABLE_ENTRIES = 1 << (32 - PAGE_BITS);
std::array<std::uint8_t*, NUM_PAGE_TABLE_ENTRIES>* page_table = nullptr;
/// Determines if the pointer in the page_table shall be offseted locally or globally.
/// 'false' will access page_table[addr >> bits][addr & mask]
/// 'true' will access page_table[addr >> bits][addr]
@ -163,26 +192,11 @@ struct UserConfig {
/// So there might be wrongly faulted pages which maps to nullptr.
/// This can be avoided by carefully allocating the memory region.
bool absolute_offset_page_table = false;
/// Masks out the first N bits in host pointers from the page table.
/// The intention behind this is to allow users of Dynarmic to pack attributes in the
/// same integer and update the pointer attribute pair atomically.
/// If the configured value is 3, all pointers will be forcefully aligned to 8 bytes.
int page_table_pointer_mask_bits = 0;
/// Determines if we should detect memory accesses via page_table that straddle are
/// misaligned. Accesses that straddle page boundaries will fallback to the relevant
/// memory callback.
/// This value should be the required access sizes this applies to ORed together.
/// To detect any access, use: 8 | 16 | 32 | 64.
std::uint8_t detect_misaligned_access_via_page_table = 0;
/// Determines if the above option only triggers when the misalignment straddles a
/// page boundary.
bool only_detect_misalignment_via_page_table_on_page_boundary = false;
// Fastmem Pointer
// This should point to the beginning of a 4GB address space which is in arranged just like
// what you wish for emulated memory to be. If the host page faults on an address, the JIT
// will fallback to calling the MemoryRead*/MemoryWrite* callbacks.
std::optional<uintptr_t> fastmem_pointer = std::nullopt;
/// Determines if instructions that pagefault should cause recompilation of that block
/// with fastmem disabled.
/// Recompiled code will use the page_table if this is available, otherwise memory
@ -198,9 +212,6 @@ struct UserConfig {
/// callbacks.
bool recompile_on_exclusive_fastmem_failure = true;
// Coprocessors
std::array<std::shared_ptr<Coprocessor>, 16> coprocessors{};
/// When set to true, UserCallbacks::InstructionSynchronizationBarrierRaised will be
/// called when an ISB instruction is executed.
/// When set to false, ISB will be treated as a NOP instruction.
@ -234,10 +245,6 @@ struct UserConfig {
/// in unusual behavior.
bool always_little_endian = false;
// Minimum size is about 8MiB. Maximum size is about 128MiB (arm64 host) or 2GiB (x64 host).
// Maximum size is limited by the maximum length of a x86_64 / arm64 jump.
size_t code_cache_size = 128 * 1024 * 1024; // bytes
/// Internal use only
bool very_verbose_debugging_output = false;
};

View file

@ -136,11 +136,30 @@ struct UserCallbacks {
};
struct UserConfig {
/// Fastmem Pointer
/// This should point to the beginning of a 2^page_table_address_space_bits bytes
/// address space which is in arranged just like what you wish for emulated memory to
/// be. If the host page faults on an address, the JIT will fallback to calling the
/// MemoryRead*/MemoryWrite* callbacks.
std::optional<std::uintptr_t> fastmem_pointer = std::nullopt;
UserCallbacks* callbacks;
size_t processor_id = 0;
ExclusiveMonitor* global_monitor = nullptr;
/// Pointer to where TPIDRRO_EL0 is stored. This pointer will be inserted into
/// emitted code.
const std::uint64_t* tpidrro_el0 = nullptr;
/// Pointer to where TPIDR_EL0 is stored. This pointer will be inserted into
/// emitted code.
std::uint64_t* tpidr_el0 = nullptr;
/// Pointer to the page table which we can use for direct page table access.
/// If an entry in page_table is null, the relevant memory callback will be called.
/// If page_table is nullptr, all memory accesses hit the memory callbacks.
void** page_table = nullptr;
/// This selects other optimizations than can't otherwise be disabled by setting other
/// configuration options. This includes:
/// - IR optimizations
@ -149,12 +168,50 @@ struct UserConfig {
/// This is intended to be used for debugging.
OptimizationFlag optimizations = all_safe_optimizations;
bool HasOptimization(OptimizationFlag f) const {
if (!unsafe_optimizations) {
f &= all_safe_optimizations;
}
return (f & optimizations) != no_optimizations;
}
/// Declares how many valid address bits are there in virtual addresses.
/// Determines the size of page_table. Valid values are between 12 and 64 inclusive.
/// This is only used if page_table is not nullptr.
std::uint32_t page_table_address_space_bits = 36;
/// Masks out the first N bits in host pointers from the page table.
/// The intention behind this is to allow users of Dynarmic to pack attributes in the
/// same integer and update the pointer attribute pair atomically.
/// If the configured value is 3, all pointers will be forcefully aligned to 8 bytes.
std::int32_t page_table_pointer_mask_bits = 0;
/// Counter-timer frequency register. The value of the register is not interpreted by
/// dynarmic.
std::uint32_t cntfrq_el0 = 600000000;
/// CTR_EL0<27:24> is log2 of the cache writeback granule in words.
/// CTR_EL0<23:20> is log2 of the exclusives reservation granule in words.
/// CTR_EL0<19:16> is log2 of the smallest data/unified cacheline in words.
/// CTR_EL0<15:14> is the level 1 instruction cache policy.
/// CTR_EL0<3:0> is log2 of the smallest instruction cacheline in words.
std::uint32_t ctr_el0 = 0x8444c004;
/// DCZID_EL0<3:0> is log2 of the block size in words
/// DCZID_EL0<4> is 0 if the DC ZVA instruction is permitted.
std::uint32_t dczid_el0 = 4;
/// Declares how many valid address bits are there in virtual addresses.
/// Determines the size of fastmem arena. Valid values are between 12 and 64 inclusive.
/// This is only used if fastmem_pointer is set.
std::uint32_t fastmem_address_space_bits = 36;
// Minimum size is about 8MiB. Maximum size is about 128MiB (arm64 host) or 2GiB (x64 host).
// Maximum size is limited by the maximum length of a x86_64 / arm64 jump.
std::uint32_t code_cache_size = 128 * 1024 * 1024; // bytes
/// Determines if we should detect memory accesses via page_table that straddle are
/// misaligned. Accesses that straddle page boundaries will fallback to the relevant
/// memory callback.
/// This value should be the required access sizes this applies to ORed together.
/// To detect any access, use: 8 | 16 | 32 | 64 | 128.
std::uint8_t detect_misaligned_access_via_page_table = 0;
/// Processor ID
std::uint8_t processor_id = 0;
/// This enables unsafe optimizations that reduce emulation accuracy in favour of speed.
/// For safety, in order to enable unsafe optimizations you have to set BOTH this flag
@ -177,48 +234,13 @@ struct UserConfig {
/// instruction is executed.
bool hook_hint_instructions = false;
/// Counter-timer frequency register. The value of the register is not interpreted by
/// dynarmic.
std::uint32_t cntfrq_el0 = 600000000;
/// CTR_EL0<27:24> is log2 of the cache writeback granule in words.
/// CTR_EL0<23:20> is log2 of the exclusives reservation granule in words.
/// CTR_EL0<19:16> is log2 of the smallest data/unified cacheline in words.
/// CTR_EL0<15:14> is the level 1 instruction cache policy.
/// CTR_EL0<3:0> is log2 of the smallest instruction cacheline in words.
std::uint32_t ctr_el0 = 0x8444c004;
/// DCZID_EL0<3:0> is log2 of the block size in words
/// DCZID_EL0<4> is 0 if the DC ZVA instruction is permitted.
std::uint32_t dczid_el0 = 4;
/// Pointer to where TPIDRRO_EL0 is stored. This pointer will be inserted into
/// emitted code.
const std::uint64_t* tpidrro_el0 = nullptr;
/// Pointer to where TPIDR_EL0 is stored. This pointer will be inserted into
/// emitted code.
std::uint64_t* tpidr_el0 = nullptr;
/// Pointer to the page table which we can use for direct page table access.
/// If an entry in page_table is null, the relevant memory callback will be called.
/// If page_table is nullptr, all memory accesses hit the memory callbacks.
void** page_table = nullptr;
/// Declares how many valid address bits are there in virtual addresses.
/// Determines the size of page_table. Valid values are between 12 and 64 inclusive.
/// This is only used if page_table is not nullptr.
size_t page_table_address_space_bits = 36;
/// Masks out the first N bits in host pointers from the page table.
/// The intention behind this is to allow users of Dynarmic to pack attributes in the
/// same integer and update the pointer attribute pair atomically.
/// If the configured value is 3, all pointers will be forcefully aligned to 8 bytes.
int page_table_pointer_mask_bits = 0;
/// Determines what happens if the guest accesses an entry that is off the end of the
/// page table. If true, Dynarmic will silently mirror page_table's address space. If
/// false, accessing memory outside of page_table bounds will result in a call to the
/// relevant memory callback.
/// This is only used if page_table is not nullptr.
bool silently_mirror_page_table = true;
/// Determines if the pointer in the page_table shall be offseted locally or globally.
/// 'false' will access page_table[addr >> bits][addr & mask]
/// 'true' will access page_table[addr >> bits][addr]
@ -226,31 +248,17 @@ struct UserConfig {
/// So there might be wrongly faulted pages which maps to nullptr.
/// This can be avoided by carefully allocating the memory region.
bool absolute_offset_page_table = false;
/// Determines if we should detect memory accesses via page_table that straddle are
/// misaligned. Accesses that straddle page boundaries will fallback to the relevant
/// memory callback.
/// This value should be the required access sizes this applies to ORed together.
/// To detect any access, use: 8 | 16 | 32 | 64 | 128.
std::uint8_t detect_misaligned_access_via_page_table = 0;
/// Determines if the above option only triggers when the misalignment straddles a
/// page boundary.
bool only_detect_misalignment_via_page_table_on_page_boundary = false;
/// Fastmem Pointer
/// This should point to the beginning of a 2^page_table_address_space_bits bytes
/// address space which is in arranged just like what you wish for emulated memory to
/// be. If the host page faults on an address, the JIT will fallback to calling the
/// MemoryRead*/MemoryWrite* callbacks.
std::optional<uintptr_t> fastmem_pointer = std::nullopt;
/// Determines if instructions that pagefault should cause recompilation of that block
/// with fastmem disabled.
/// Recompiled code will use the page_table if this is available, otherwise memory
/// accesses will hit the memory callbacks.
bool recompile_on_fastmem_failure = true;
/// Declares how many valid address bits are there in virtual addresses.
/// Determines the size of fastmem arena. Valid values are between 12 and 64 inclusive.
/// This is only used if fastmem_pointer is set.
size_t fastmem_address_space_bits = 36;
/// Determines what happens if the guest accesses an entry that is off the end of the
/// fastmem arena. If true, Dynarmic will silently mirror fastmem's address space. If
/// false, accessing memory outside of fastmem bounds will result in a call to the
@ -285,12 +293,15 @@ struct UserConfig {
/// AddTicks and GetTicksRemaining are never called, and no cycle counting is done.
bool enable_cycle_counting = true;
// Minimum size is about 8MiB. Maximum size is about 128MiB (arm64 host) or 2GiB (x64 host).
// Maximum size is limited by the maximum length of a x86_64 / arm64 jump.
size_t code_cache_size = 128 * 1024 * 1024; // bytes
/// Internal use only
bool very_verbose_debugging_output = false;
inline bool HasOptimization(OptimizationFlag f) const {
if (!unsafe_optimizations) {
f &= all_safe_optimizations;
}
return (f & optimizations) != no_optimizations;
}
};
} // namespace A64