diff --git a/externals/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/externals/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index a0ee8ae9ed..2a30c52e01 100644 --- a/externals/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -415,21 +415,48 @@ void RegAlloc::ReleaseStackSpace(const size_t stack_space) noexcept { } HostLoc RegAlloc::SelectARegister(const boost::container::static_vector& desired_locations) const noexcept { - boost::container::static_vector candidates = desired_locations; //Who let someone copy an ENTIRE VECTOR here? - - // Find all locations that have not been allocated.. - const auto allocated_locs = std::partition(candidates.begin(), candidates.end(), [this](auto loc) noexcept { - return !this->LocInfo(loc).IsLocked(); - }); - candidates.erase(allocated_locs, candidates.end()); - ASSERT_MSG(!candidates.empty(), "All candidate registers have already been allocated"); - // Selects the best location out of the available locations. + // NOTE: Using last is BAD because new REX prefix for each insn using the last regs // TODO: Actually do LRU or something. Currently we just try to pick something without a value if possible. - auto const it = std::find_if(candidates.begin(), candidates.end(), [this](auto const loc) noexcept { - return this->LocInfo(loc).IsEmpty(); - }); - return it != candidates.end() ? *it : candidates.front(); + auto min_lru_counter = size_t(-1); + auto it_candidate = desired_locations.cend(); //default fallback if everything fails + auto it_rex_candidate = desired_locations.cend(); + auto it_empty_candidate = desired_locations.cend(); + for (auto it = desired_locations.cbegin(); it != desired_locations.cend(); it++) { + auto const& loc_info = LocInfo(*it); + // Abstain from using upper registers unless absolutely nescesary + if (loc_info.IsLocked()) { + // skip, not suitable for allocation + } else { + // idempotency, only assign once + if (it_empty_candidate == desired_locations.cend() && loc_info.IsEmpty()) + it_empty_candidate = it; + if (loc_info.lru_counter < min_lru_counter) { + // Otherwise a "quasi"-LRU + min_lru_counter = loc_info.lru_counter; + if (*it >= HostLoc::R8 && *it <= HostLoc::R15) { + it_rex_candidate = it; + } else { + it_candidate = it; + } + if (min_lru_counter == 0) + break; //early exit + } + } + } + // Final resolution goes as follows: + // 1 => Try normal candidate (no REX prefix) + // 2 => Try an empty candidate + // 3 => Try using a REX prefixed one + // We avoid using REX-addressable registers because they add +1 REX prefix which + // do we really need? The trade-off may not be worth it. + auto const it_final = it_candidate != desired_locations.cend() + ? it_candidate : it_empty_candidate != desired_locations.cend() + ? it_empty_candidate : it_rex_candidate; + ASSERT_MSG(it_final != desired_locations.cend(), "All candidate registers have already been allocated"); + // Evil magic - increment LRU counter (will wrap at 256) + const_cast(this)->LocInfo(*it_final).lru_counter++; + return *it_final; } void RegAlloc::DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc) noexcept { diff --git a/externals/dynarmic/src/dynarmic/backend/x64/reg_alloc.h b/externals/dynarmic/src/dynarmic/backend/x64/reg_alloc.h index 599aab12a8..64f7c1ec40 100644 --- a/externals/dynarmic/src/dynarmic/backend/x64/reg_alloc.h +++ b/externals/dynarmic/src/dynarmic/backend/x64/reg_alloc.h @@ -92,8 +92,8 @@ private: uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128 bool is_scratch : 1 = false; //1 bool is_set_last_use : 1 = false; //1 - - alignas(16) char padding; + alignas(16) uint8_t lru_counter = 0; //1 + friend class RegAlloc; }; static_assert(sizeof(HostLocInfo) == 64);