From 9917479e6066c3157ce04e7178caff32e6ebfe9c Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 22 Jul 2025 05:03:15 +0100 Subject: [PATCH] [dynarmic] revert memory imm precalc --- .../impl/load_store_single_structure.cpp | 43 ++++++++----------- 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/externals/dynarmic/src/dynarmic/frontend/A64/translate/impl/load_store_single_structure.cpp b/externals/dynarmic/src/dynarmic/frontend/A64/translate/impl/load_store_single_structure.cpp index 9615abb153..b4bc842942 100644 --- a/externals/dynarmic/src/dynarmic/frontend/A64/translate/impl/load_store_single_structure.cpp +++ b/externals/dynarmic/src/dynarmic/frontend/A64/translate/impl/load_store_single_structure.cpp @@ -58,51 +58,46 @@ static bool SharedDecodeAndOperation(TranslatorVisitor& v, bool wback, IR::MemOp address = v.X(64, Rn); } + IR::U64 offs = v.ir.Imm64(0); if (replicate) { - // CPU likes when we read first and then we do operations; Sure, OOO, but might as well - IR::UAnyU128 p_elements[4] = {}; //max upper bound=4 elements - for (size_t s = 0; s < selem; ++s) { - p_elements[s] = v.Mem(v.ir.Add(address, v.ir.Imm64(ebytes * s)), ebytes, IR::AccType::VEC); - } - // schedule ops after - for (size_t s = 0; s < selem; ++s) { + for (size_t s = 0; s < selem; s++) { const Vec tt = static_cast((VecNumber(Vt) + s) % 32); - const IR::U128 broadcasted_element = v.ir.VectorBroadcast(esize, p_elements[s]); + const IR::UAnyU128 element = v.Mem(v.ir.Add(address, offs), ebytes, IR::AccType::VEC); + const IR::U128 broadcasted_element = v.ir.VectorBroadcast(esize, element); + v.V(datasize, tt, broadcasted_element); + + offs = v.ir.Add(offs, v.ir.Imm64(ebytes)); } } else { - if (memop == IR::MemOp::LOAD) { - IR::UAny p_elements[4] = {}; //max upper bound=4 elements - for (size_t s = 0; s < selem; ++s) { - p_elements[s] = v.Mem(v.ir.Add(address, v.ir.Imm64(ebytes * s)), ebytes, IR::AccType::VEC); - } - for (size_t s = 0; s < selem; ++s) { - const Vec tt = static_cast((VecNumber(Vt) + s) % 32); - const IR::U128 rval = v.V(128, tt); - const IR::U128 vec = v.ir.VectorSetElement(esize, rval, index, p_elements[s]); + for (size_t s = 0; s < selem; s++) { + const Vec tt = static_cast((VecNumber(Vt) + s) % 32); + const IR::U128 rval = v.V(128, tt); + + if (memop == IR::MemOp::LOAD) { + const IR::UAny elem = v.Mem(v.ir.Add(address, offs), ebytes, IR::AccType::VEC); + const IR::U128 vec = v.ir.VectorSetElement(esize, rval, index, elem); v.V(128, tt, vec); - } - } else { - for (size_t s = 0; s < selem; ++s) { - const Vec tt = static_cast((VecNumber(Vt) + s) % 32); - const IR::U128 rval = v.V(128, tt); + } else { const IR::UAny elem = v.ir.VectorGetElement(esize, rval, index); - v.Mem(v.ir.Add(address, v.ir.Imm64(ebytes * s)), ebytes, IR::AccType::VEC, elem); + v.Mem(v.ir.Add(address, offs), ebytes, IR::AccType::VEC, elem); } + offs = v.ir.Add(offs, v.ir.Imm64(ebytes)); } } - IR::U64 offs = v.ir.Imm64(ebytes * selem); if (wback) { if (*Rm != Reg::SP) { offs = v.X(64, *Rm); } + if (Rn == Reg::SP) { v.SP(64, v.ir.Add(address, offs)); } else { v.X(64, Rn, v.ir.Add(address, offs)); } } + return true; }