diff --git a/externals/dynarmic/src/dynarmic/frontend/A32/a32_ir_emitter.h b/externals/dynarmic/src/dynarmic/frontend/A32/a32_ir_emitter.h index 9fde4f8775..38160f96d4 100644 --- a/externals/dynarmic/src/dynarmic/frontend/A32/a32_ir_emitter.h +++ b/externals/dynarmic/src/dynarmic/frontend/A32/a32_ir_emitter.h @@ -15,7 +15,7 @@ namespace Dynarmic::A32 { -enum class ArchVersion; +enum class ArchVersion : std::uint8_t; enum class CoprocReg; enum class Exception; enum class ExtReg; @@ -27,12 +27,11 @@ enum class Reg; * The user of this class updates `current_location` as appropriate. */ class IREmitter : public IR::IREmitter { + IR::U64 ImmCurrentLocationDescriptor(); public: IREmitter(IR::Block& block, LocationDescriptor descriptor, ArchVersion arch_version) : IR::IREmitter(block), current_location(descriptor), arch_version(arch_version) {} - - LocationDescriptor current_location; - + size_t ArchVersion() const; u32 PC() const; @@ -107,10 +106,9 @@ public: IR::U64 CoprocGetTwoWords(size_t coproc_no, bool two, size_t opc, CoprocReg CRm); void CoprocLoadWords(size_t coproc_no, bool two, bool long_transfer, CoprocReg CRd, const IR::U32& address, bool has_option, u8 option); void CoprocStoreWords(size_t coproc_no, bool two, bool long_transfer, CoprocReg CRd, const IR::U32& address, bool has_option, u8 option); - -private: +public: + LocationDescriptor current_location; enum ArchVersion arch_version; - IR::U64 ImmCurrentLocationDescriptor(); }; } // namespace Dynarmic::A32 diff --git a/externals/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h b/externals/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h index 16ae52e13a..e4cf4a2865 100644 --- a/externals/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h +++ b/externals/dynarmic/src/dynarmic/frontend/A32/decoder/arm.h @@ -33,13 +33,11 @@ inline size_t ToFastLookupIndexArm(u32 instruction) { } // namespace detail template -ArmDecodeTable GetArmDecodeTable() { +constexpr ArmDecodeTable GetArmDecodeTable() { std::vector> list = { - #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(ArmMatcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./arm.inc" #undef INST - }; // If a matcher has more bits in its mask it is more specific, so it should come first. @@ -62,9 +60,10 @@ ArmDecodeTable GetArmDecodeTable() { template std::optional>> DecodeArm(u32 instruction) { - static const auto table = GetArmDecodeTable(); - - const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; + alignas(64) static const auto table = GetArmDecodeTable(); + const auto matches_instruction = [instruction](const auto& matcher) { + return matcher.Matches(instruction); + }; const auto& subtable = table[detail::ToFastLookupIndexArm(instruction)]; auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction); diff --git a/externals/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h b/externals/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h index f264893502..e807490d16 100644 --- a/externals/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h +++ b/externals/dynarmic/src/dynarmic/frontend/A64/decoder/a64.h @@ -33,27 +33,26 @@ inline size_t ToFastLookupIndex(u32 instruction) { } // namespace detail template -DecodeTable GetDecodeTable() { +constexpr DecodeTable GetDecodeTable() { std::vector> list = { #define INST(fn, name, bitstring) DYNARMIC_DECODER_GET_MATCHER(Matcher, fn, name, Decoder::detail::StringToArray<32>(bitstring)), #include "./a64.inc" #undef INST }; + // If a matcher has more bits in its mask it is more specific, so it should come first. std::stable_sort(list.begin(), list.end(), [](const auto& matcher1, const auto& matcher2) { // If a matcher has more bits in its mask it is more specific, so it should come first. return mcl::bit::count_ones(matcher1.GetMask()) > mcl::bit::count_ones(matcher2.GetMask()); }); // Exceptions to the above rule of thumb. - const std::set comes_first{ - "MOVI, MVNI, ORR, BIC (vector, immediate)", - "FMOV (vector, immediate)", - "Unallocated SIMD modified immediate", - }; - std::stable_partition(list.begin(), list.end(), [&](const auto& matcher) { - return comes_first.count(matcher.GetName()) > 0; + return std::set{ + "MOVI, MVNI, ORR, BIC (vector, immediate)", + "FMOV (vector, immediate)", + "Unallocated SIMD modified immediate", + }.count(matcher.GetName()) > 0; }); DecodeTable table{}; @@ -75,7 +74,6 @@ std::optional>> Decode(u32 instruction) const auto matches_instruction = [instruction](const auto& matcher) { return matcher.Matches(instruction); }; - const auto& subtable = table[detail::ToFastLookupIndex(instruction)]; auto iter = std::find_if(subtable.begin(), subtable.end(), matches_instruction); return iter != subtable.end() ? std::optional>>(*iter) : std::nullopt; diff --git a/externals/dynarmic/src/dynarmic/interface/A32/arch_version.h b/externals/dynarmic/src/dynarmic/interface/A32/arch_version.h index 240e40ee4c..209bc594f2 100644 --- a/externals/dynarmic/src/dynarmic/interface/A32/arch_version.h +++ b/externals/dynarmic/src/dynarmic/interface/A32/arch_version.h @@ -5,10 +5,12 @@ #pragma once +#include + namespace Dynarmic { namespace A32 { -enum class ArchVersion { +enum class ArchVersion : std::uint8_t { v3, v4, v4T, diff --git a/externals/dynarmic/src/dynarmic/interface/A32/config.h b/externals/dynarmic/src/dynarmic/interface/A32/config.h index 360df06e2a..033967dc00 100644 --- a/externals/dynarmic/src/dynarmic/interface/A32/config.h +++ b/externals/dynarmic/src/dynarmic/interface/A32/config.h @@ -120,14 +120,32 @@ struct UserCallbacks : public TranslateCallbacks { }; struct UserConfig { + bool HasOptimization(OptimizationFlag f) const { + if (!unsafe_optimizations) { + f &= all_safe_optimizations; + } + return (f & optimizations) != no_optimizations; + } + UserCallbacks* callbacks; - size_t processor_id = 0; ExclusiveMonitor* global_monitor = nullptr; - /// Select the architecture version to use. - /// There are minor behavioural differences between versions. - ArchVersion arch_version = ArchVersion::v8; + // Page Table + // The page table is used for faster memory access. If an entry in the table is nullptr, + // the JIT will fallback to calling the MemoryRead*/MemoryWrite* callbacks. + static constexpr std::size_t PAGE_BITS = 12; + static constexpr std::size_t NUM_PAGE_TABLE_ENTRIES = 1 << (32 - PAGE_BITS); + std::array* page_table = nullptr; + + /// Coprocessors + std::array, 16> coprocessors{}; + + /// Fastmem Pointer + /// This should point to the beginning of a 4GB address space which is in arranged just like + /// what you wish for emulated memory to be. If the host page faults on an address, the JIT + /// will fallback to calling the MemoryRead*/MemoryWrite* callbacks. + std::optional fastmem_pointer = std::nullopt; /// This selects other optimizations than can't otherwise be disabled by setting other /// configuration options. This includes: @@ -137,12 +155,29 @@ struct UserConfig { /// This is intended to be used for debugging. OptimizationFlag optimizations = all_safe_optimizations; - bool HasOptimization(OptimizationFlag f) const { - if (!unsafe_optimizations) { - f &= all_safe_optimizations; - } - return (f & optimizations) != no_optimizations; - } + /// Minimum size is about 8MiB. Maximum size is about 128MiB (arm64 host) or 2GiB (x64 host). + /// Maximum size is limited by the maximum length of a x86_64 / arm64 jump. + std::uint32_t code_cache_size = 128 * 1024 * 1024; // bytes + + /// Processor ID + std::uint32_t processor_id = 0; + + /// Masks out the first N bits in host pointers from the page table. + /// The intention behind this is to allow users of Dynarmic to pack attributes in the + /// same integer and update the pointer attribute pair atomically. + /// If the configured value is 3, all pointers will be forcefully aligned to 8 bytes. + std::int32_t page_table_pointer_mask_bits = 0; + + /// Select the architecture version to use. + /// There are minor behavioural differences between versions. + ArchVersion arch_version = ArchVersion::v8; + + /// Determines if we should detect memory accesses via page_table that straddle are + /// misaligned. Accesses that straddle page boundaries will fallback to the relevant + /// memory callback. + /// This value should be the required access sizes this applies to ORed together. + /// To detect any access, use: 8 | 16 | 32 | 64. + std::uint8_t detect_misaligned_access_via_page_table = 0; /// This enables unsafe optimizations that reduce emulation accuracy in favour of speed. /// For safety, in order to enable unsafe optimizations you have to set BOTH this flag @@ -150,12 +185,6 @@ struct UserConfig { /// The prefered and tested mode for this library is with unsafe optimizations disabled. bool unsafe_optimizations = false; - // Page Table - // The page table is used for faster memory access. If an entry in the table is nullptr, - // the JIT will fallback to calling the MemoryRead*/MemoryWrite* callbacks. - static constexpr std::size_t PAGE_BITS = 12; - static constexpr std::size_t NUM_PAGE_TABLE_ENTRIES = 1 << (32 - PAGE_BITS); - std::array* page_table = nullptr; /// Determines if the pointer in the page_table shall be offseted locally or globally. /// 'false' will access page_table[addr >> bits][addr & mask] /// 'true' will access page_table[addr >> bits][addr] @@ -163,26 +192,11 @@ struct UserConfig { /// So there might be wrongly faulted pages which maps to nullptr. /// This can be avoided by carefully allocating the memory region. bool absolute_offset_page_table = false; - /// Masks out the first N bits in host pointers from the page table. - /// The intention behind this is to allow users of Dynarmic to pack attributes in the - /// same integer and update the pointer attribute pair atomically. - /// If the configured value is 3, all pointers will be forcefully aligned to 8 bytes. - int page_table_pointer_mask_bits = 0; - /// Determines if we should detect memory accesses via page_table that straddle are - /// misaligned. Accesses that straddle page boundaries will fallback to the relevant - /// memory callback. - /// This value should be the required access sizes this applies to ORed together. - /// To detect any access, use: 8 | 16 | 32 | 64. - std::uint8_t detect_misaligned_access_via_page_table = 0; + /// Determines if the above option only triggers when the misalignment straddles a /// page boundary. bool only_detect_misalignment_via_page_table_on_page_boundary = false; - // Fastmem Pointer - // This should point to the beginning of a 4GB address space which is in arranged just like - // what you wish for emulated memory to be. If the host page faults on an address, the JIT - // will fallback to calling the MemoryRead*/MemoryWrite* callbacks. - std::optional fastmem_pointer = std::nullopt; /// Determines if instructions that pagefault should cause recompilation of that block /// with fastmem disabled. /// Recompiled code will use the page_table if this is available, otherwise memory @@ -198,9 +212,6 @@ struct UserConfig { /// callbacks. bool recompile_on_exclusive_fastmem_failure = true; - // Coprocessors - std::array, 16> coprocessors{}; - /// When set to true, UserCallbacks::InstructionSynchronizationBarrierRaised will be /// called when an ISB instruction is executed. /// When set to false, ISB will be treated as a NOP instruction. @@ -234,10 +245,6 @@ struct UserConfig { /// in unusual behavior. bool always_little_endian = false; - // Minimum size is about 8MiB. Maximum size is about 128MiB (arm64 host) or 2GiB (x64 host). - // Maximum size is limited by the maximum length of a x86_64 / arm64 jump. - size_t code_cache_size = 128 * 1024 * 1024; // bytes - /// Internal use only bool very_verbose_debugging_output = false; }; diff --git a/externals/dynarmic/src/dynarmic/interface/A64/config.h b/externals/dynarmic/src/dynarmic/interface/A64/config.h index c8ed623eb4..3563c0b2f4 100644 --- a/externals/dynarmic/src/dynarmic/interface/A64/config.h +++ b/externals/dynarmic/src/dynarmic/interface/A64/config.h @@ -136,11 +136,30 @@ struct UserCallbacks { }; struct UserConfig { + /// Fastmem Pointer + /// This should point to the beginning of a 2^page_table_address_space_bits bytes + /// address space which is in arranged just like what you wish for emulated memory to + /// be. If the host page faults on an address, the JIT will fallback to calling the + /// MemoryRead*/MemoryWrite* callbacks. + std::optional fastmem_pointer = std::nullopt; + UserCallbacks* callbacks; - size_t processor_id = 0; ExclusiveMonitor* global_monitor = nullptr; + /// Pointer to where TPIDRRO_EL0 is stored. This pointer will be inserted into + /// emitted code. + const std::uint64_t* tpidrro_el0 = nullptr; + + /// Pointer to where TPIDR_EL0 is stored. This pointer will be inserted into + /// emitted code. + std::uint64_t* tpidr_el0 = nullptr; + + /// Pointer to the page table which we can use for direct page table access. + /// If an entry in page_table is null, the relevant memory callback will be called. + /// If page_table is nullptr, all memory accesses hit the memory callbacks. + void** page_table = nullptr; + /// This selects other optimizations than can't otherwise be disabled by setting other /// configuration options. This includes: /// - IR optimizations @@ -149,12 +168,50 @@ struct UserConfig { /// This is intended to be used for debugging. OptimizationFlag optimizations = all_safe_optimizations; - bool HasOptimization(OptimizationFlag f) const { - if (!unsafe_optimizations) { - f &= all_safe_optimizations; - } - return (f & optimizations) != no_optimizations; - } + /// Declares how many valid address bits are there in virtual addresses. + /// Determines the size of page_table. Valid values are between 12 and 64 inclusive. + /// This is only used if page_table is not nullptr. + std::uint32_t page_table_address_space_bits = 36; + + /// Masks out the first N bits in host pointers from the page table. + /// The intention behind this is to allow users of Dynarmic to pack attributes in the + /// same integer and update the pointer attribute pair atomically. + /// If the configured value is 3, all pointers will be forcefully aligned to 8 bytes. + std::int32_t page_table_pointer_mask_bits = 0; + + /// Counter-timer frequency register. The value of the register is not interpreted by + /// dynarmic. + std::uint32_t cntfrq_el0 = 600000000; + + /// CTR_EL0<27:24> is log2 of the cache writeback granule in words. + /// CTR_EL0<23:20> is log2 of the exclusives reservation granule in words. + /// CTR_EL0<19:16> is log2 of the smallest data/unified cacheline in words. + /// CTR_EL0<15:14> is the level 1 instruction cache policy. + /// CTR_EL0<3:0> is log2 of the smallest instruction cacheline in words. + std::uint32_t ctr_el0 = 0x8444c004; + + /// DCZID_EL0<3:0> is log2 of the block size in words + /// DCZID_EL0<4> is 0 if the DC ZVA instruction is permitted. + std::uint32_t dczid_el0 = 4; + + /// Declares how many valid address bits are there in virtual addresses. + /// Determines the size of fastmem arena. Valid values are between 12 and 64 inclusive. + /// This is only used if fastmem_pointer is set. + std::uint32_t fastmem_address_space_bits = 36; + + // Minimum size is about 8MiB. Maximum size is about 128MiB (arm64 host) or 2GiB (x64 host). + // Maximum size is limited by the maximum length of a x86_64 / arm64 jump. + std::uint32_t code_cache_size = 128 * 1024 * 1024; // bytes + + /// Determines if we should detect memory accesses via page_table that straddle are + /// misaligned. Accesses that straddle page boundaries will fallback to the relevant + /// memory callback. + /// This value should be the required access sizes this applies to ORed together. + /// To detect any access, use: 8 | 16 | 32 | 64 | 128. + std::uint8_t detect_misaligned_access_via_page_table = 0; + + /// Processor ID + std::uint8_t processor_id = 0; /// This enables unsafe optimizations that reduce emulation accuracy in favour of speed. /// For safety, in order to enable unsafe optimizations you have to set BOTH this flag @@ -177,48 +234,13 @@ struct UserConfig { /// instruction is executed. bool hook_hint_instructions = false; - /// Counter-timer frequency register. The value of the register is not interpreted by - /// dynarmic. - std::uint32_t cntfrq_el0 = 600000000; - - /// CTR_EL0<27:24> is log2 of the cache writeback granule in words. - /// CTR_EL0<23:20> is log2 of the exclusives reservation granule in words. - /// CTR_EL0<19:16> is log2 of the smallest data/unified cacheline in words. - /// CTR_EL0<15:14> is the level 1 instruction cache policy. - /// CTR_EL0<3:0> is log2 of the smallest instruction cacheline in words. - std::uint32_t ctr_el0 = 0x8444c004; - - /// DCZID_EL0<3:0> is log2 of the block size in words - /// DCZID_EL0<4> is 0 if the DC ZVA instruction is permitted. - std::uint32_t dczid_el0 = 4; - - /// Pointer to where TPIDRRO_EL0 is stored. This pointer will be inserted into - /// emitted code. - const std::uint64_t* tpidrro_el0 = nullptr; - - /// Pointer to where TPIDR_EL0 is stored. This pointer will be inserted into - /// emitted code. - std::uint64_t* tpidr_el0 = nullptr; - - /// Pointer to the page table which we can use for direct page table access. - /// If an entry in page_table is null, the relevant memory callback will be called. - /// If page_table is nullptr, all memory accesses hit the memory callbacks. - void** page_table = nullptr; - /// Declares how many valid address bits are there in virtual addresses. - /// Determines the size of page_table. Valid values are between 12 and 64 inclusive. - /// This is only used if page_table is not nullptr. - size_t page_table_address_space_bits = 36; - /// Masks out the first N bits in host pointers from the page table. - /// The intention behind this is to allow users of Dynarmic to pack attributes in the - /// same integer and update the pointer attribute pair atomically. - /// If the configured value is 3, all pointers will be forcefully aligned to 8 bytes. - int page_table_pointer_mask_bits = 0; /// Determines what happens if the guest accesses an entry that is off the end of the /// page table. If true, Dynarmic will silently mirror page_table's address space. If /// false, accessing memory outside of page_table bounds will result in a call to the /// relevant memory callback. /// This is only used if page_table is not nullptr. bool silently_mirror_page_table = true; + /// Determines if the pointer in the page_table shall be offseted locally or globally. /// 'false' will access page_table[addr >> bits][addr & mask] /// 'true' will access page_table[addr >> bits][addr] @@ -226,31 +248,17 @@ struct UserConfig { /// So there might be wrongly faulted pages which maps to nullptr. /// This can be avoided by carefully allocating the memory region. bool absolute_offset_page_table = false; - /// Determines if we should detect memory accesses via page_table that straddle are - /// misaligned. Accesses that straddle page boundaries will fallback to the relevant - /// memory callback. - /// This value should be the required access sizes this applies to ORed together. - /// To detect any access, use: 8 | 16 | 32 | 64 | 128. - std::uint8_t detect_misaligned_access_via_page_table = 0; + /// Determines if the above option only triggers when the misalignment straddles a /// page boundary. bool only_detect_misalignment_via_page_table_on_page_boundary = false; - /// Fastmem Pointer - /// This should point to the beginning of a 2^page_table_address_space_bits bytes - /// address space which is in arranged just like what you wish for emulated memory to - /// be. If the host page faults on an address, the JIT will fallback to calling the - /// MemoryRead*/MemoryWrite* callbacks. - std::optional fastmem_pointer = std::nullopt; /// Determines if instructions that pagefault should cause recompilation of that block /// with fastmem disabled. /// Recompiled code will use the page_table if this is available, otherwise memory /// accesses will hit the memory callbacks. bool recompile_on_fastmem_failure = true; - /// Declares how many valid address bits are there in virtual addresses. - /// Determines the size of fastmem arena. Valid values are between 12 and 64 inclusive. - /// This is only used if fastmem_pointer is set. - size_t fastmem_address_space_bits = 36; + /// Determines what happens if the guest accesses an entry that is off the end of the /// fastmem arena. If true, Dynarmic will silently mirror fastmem's address space. If /// false, accessing memory outside of fastmem bounds will result in a call to the @@ -285,12 +293,15 @@ struct UserConfig { /// AddTicks and GetTicksRemaining are never called, and no cycle counting is done. bool enable_cycle_counting = true; - // Minimum size is about 8MiB. Maximum size is about 128MiB (arm64 host) or 2GiB (x64 host). - // Maximum size is limited by the maximum length of a x86_64 / arm64 jump. - size_t code_cache_size = 128 * 1024 * 1024; // bytes - /// Internal use only bool very_verbose_debugging_output = false; + + inline bool HasOptimization(OptimizationFlag f) const { + if (!unsafe_optimizations) { + f &= all_safe_optimizations; + } + return (f & optimizations) != no_optimizations; + } }; } // namespace A64