[dynarmic] new LRU allocation scheme

This commit is contained in:
lizzie 2025-07-19 00:49:18 +01:00 committed by crueter
parent 9dfe3cece0
commit fc6f9de3fa
2 changed files with 42 additions and 15 deletions

View file

@ -415,21 +415,48 @@ void RegAlloc::ReleaseStackSpace(const size_t stack_space) noexcept {
}
HostLoc RegAlloc::SelectARegister(const boost::container::static_vector<HostLoc, 28>& desired_locations) const noexcept {
boost::container::static_vector<HostLoc, 28> candidates = desired_locations; //Who let someone copy an ENTIRE VECTOR here?
// Find all locations that have not been allocated..
const auto allocated_locs = std::partition(candidates.begin(), candidates.end(), [this](auto loc) noexcept {
return !this->LocInfo(loc).IsLocked();
});
candidates.erase(allocated_locs, candidates.end());
ASSERT_MSG(!candidates.empty(), "All candidate registers have already been allocated");
// Selects the best location out of the available locations.
// NOTE: Using last is BAD because new REX prefix for each insn using the last regs
// TODO: Actually do LRU or something. Currently we just try to pick something without a value if possible.
auto const it = std::find_if(candidates.begin(), candidates.end(), [this](auto const loc) noexcept {
return this->LocInfo(loc).IsEmpty();
});
return it != candidates.end() ? *it : candidates.front();
auto min_lru_counter = size_t(-1);
auto it_candidate = desired_locations.cend(); //default fallback if everything fails
auto it_rex_candidate = desired_locations.cend();
auto it_empty_candidate = desired_locations.cend();
for (auto it = desired_locations.cbegin(); it != desired_locations.cend(); it++) {
auto const& loc_info = LocInfo(*it);
// Abstain from using upper registers unless absolutely nescesary
if (loc_info.IsLocked()) {
// skip, not suitable for allocation
} else {
// idempotency, only assign once
if (it_empty_candidate == desired_locations.cend() && loc_info.IsEmpty())
it_empty_candidate = it;
if (loc_info.lru_counter < min_lru_counter) {
// Otherwise a "quasi"-LRU
min_lru_counter = loc_info.lru_counter;
if (*it >= HostLoc::R8 && *it <= HostLoc::R15) {
it_rex_candidate = it;
} else {
it_candidate = it;
}
if (min_lru_counter == 0)
break; //early exit
}
}
}
// Final resolution goes as follows:
// 1 => Try normal candidate (no REX prefix)
// 2 => Try an empty candidate
// 3 => Try using a REX prefixed one
// We avoid using REX-addressable registers because they add +1 REX prefix which
// do we really need? The trade-off may not be worth it.
auto const it_final = it_candidate != desired_locations.cend()
? it_candidate : it_empty_candidate != desired_locations.cend()
? it_empty_candidate : it_rex_candidate;
ASSERT_MSG(it_final != desired_locations.cend(), "All candidate registers have already been allocated");
// Evil magic - increment LRU counter (will wrap at 256)
const_cast<RegAlloc*>(this)->LocInfo(*it_final).lru_counter++;
return *it_final;
}
void RegAlloc::DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc) noexcept {

View file

@ -92,8 +92,8 @@ private:
uint8_t max_bit_width = 0; //Valid values: 1,2,4,8,16,32,128
bool is_scratch : 1 = false; //1
bool is_set_last_use : 1 = false; //1
alignas(16) char padding;
alignas(16) uint8_t lru_counter = 0; //1
friend class RegAlloc;
};
static_assert(sizeof(HostLocInfo) == 64);