mirror of
https://git.eden-emu.dev/eden-emu/eden.git
synced 2025-07-20 03:35:46 +00:00
[dynarmic] delay ops, load first (attempt#1)
This commit is contained in:
parent
f414ebdf34
commit
f2e352822e
4 changed files with 49 additions and 44 deletions
|
@ -364,10 +364,10 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
|
|||
return_from_run_code[0] = getCurr<const void*>();
|
||||
|
||||
cmp(dword[r15 + jsi.offsetof_halt_reason], 0);
|
||||
jne(return_to_caller);
|
||||
jne(return_to_caller, T_NEAR);
|
||||
if (cb.enable_cycle_counting) {
|
||||
cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
|
||||
jng(return_to_caller);
|
||||
jng(return_to_caller, T_NEAR);
|
||||
}
|
||||
cb.LookupBlock->EmitCall(*this);
|
||||
jmp(ABI_RETURN);
|
||||
|
@ -376,10 +376,10 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
|
|||
return_from_run_code[MXCSR_ALREADY_EXITED] = getCurr<const void*>();
|
||||
|
||||
cmp(dword[r15 + jsi.offsetof_halt_reason], 0);
|
||||
jne(return_to_caller_mxcsr_already_exited);
|
||||
jne(return_to_caller_mxcsr_already_exited, T_NEAR);
|
||||
if (cb.enable_cycle_counting) {
|
||||
cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
|
||||
jng(return_to_caller_mxcsr_already_exited);
|
||||
jng(return_to_caller_mxcsr_already_exited, T_NEAR);
|
||||
}
|
||||
SwitchMxcsrOnEntry();
|
||||
cb.LookupBlock->EmitCall(*this);
|
||||
|
@ -403,8 +403,7 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
|
|||
}
|
||||
|
||||
xor_(eax, eax);
|
||||
lock();
|
||||
xchg(dword[r15 + jsi.offsetof_halt_reason], eax);
|
||||
/* implicit LOCK */ xchg(dword[r15 + jsi.offsetof_halt_reason], eax);
|
||||
|
||||
ABI_PopCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout));
|
||||
ret();
|
||||
|
|
|
@ -58,46 +58,51 @@ static bool SharedDecodeAndOperation(TranslatorVisitor& v, bool wback, IR::MemOp
|
|||
address = v.X(64, Rn);
|
||||
}
|
||||
|
||||
IR::U64 offs = v.ir.Imm64(0);
|
||||
if (replicate) {
|
||||
for (size_t s = 0; s < selem; s++) {
|
||||
// CPU likes when we read first and then we do operations; Sure, OOO, but might as well
|
||||
IR::UAnyU128 p_elements[4] = {}; //max upper bound=4 elements
|
||||
for (size_t s = 0; s < selem; ++s) {
|
||||
p_elements[s] = v.Mem(v.ir.Add(address, v.ir.Imm64(ebytes * s)), ebytes, IR::AccType::VEC);
|
||||
}
|
||||
// schedule ops after
|
||||
for (size_t s = 0; s < selem; ++s) {
|
||||
const Vec tt = static_cast<Vec>((VecNumber(Vt) + s) % 32);
|
||||
const IR::UAnyU128 element = v.Mem(v.ir.Add(address, offs), ebytes, IR::AccType::VEC);
|
||||
const IR::U128 broadcasted_element = v.ir.VectorBroadcast(esize, element);
|
||||
|
||||
const IR::U128 broadcasted_element = v.ir.VectorBroadcast(esize, p_elements[s]);
|
||||
v.V(datasize, tt, broadcasted_element);
|
||||
|
||||
offs = v.ir.Add(offs, v.ir.Imm64(ebytes));
|
||||
}
|
||||
} else {
|
||||
for (size_t s = 0; s < selem; s++) {
|
||||
const Vec tt = static_cast<Vec>((VecNumber(Vt) + s) % 32);
|
||||
const IR::U128 rval = v.V(128, tt);
|
||||
|
||||
if (memop == IR::MemOp::LOAD) {
|
||||
const IR::UAny elem = v.Mem(v.ir.Add(address, offs), ebytes, IR::AccType::VEC);
|
||||
const IR::U128 vec = v.ir.VectorSetElement(esize, rval, index, elem);
|
||||
v.V(128, tt, vec);
|
||||
} else {
|
||||
const IR::UAny elem = v.ir.VectorGetElement(esize, rval, index);
|
||||
v.Mem(v.ir.Add(address, offs), ebytes, IR::AccType::VEC, elem);
|
||||
if (memop == IR::MemOp::LOAD) {
|
||||
IR::UAny p_elements[4] = {}; //max upper bound=4 elements
|
||||
for (size_t s = 0; s < selem; ++s) {
|
||||
p_elements[s] = v.Mem(v.ir.Add(address, v.ir.Imm64(ebytes * s)), ebytes, IR::AccType::VEC);
|
||||
}
|
||||
for (size_t s = 0; s < selem; ++s) {
|
||||
const Vec tt = static_cast<Vec>((VecNumber(Vt) + s) % 32);
|
||||
const IR::U128 rval = v.V(128, tt);
|
||||
const IR::U128 vec = v.ir.VectorSetElement(esize, rval, index, p_elements[s]);
|
||||
v.V(128, tt, vec);
|
||||
}
|
||||
} else {
|
||||
for (size_t s = 0; s < selem; ++s) {
|
||||
const Vec tt = static_cast<Vec>((VecNumber(Vt) + s) % 32);
|
||||
const IR::U128 rval = v.V(128, tt);
|
||||
const IR::UAny elem = v.ir.VectorGetElement(esize, rval, index);
|
||||
v.Mem(v.ir.Add(address, v.ir.Imm64(ebytes * s)), ebytes, IR::AccType::VEC, elem);
|
||||
}
|
||||
offs = v.ir.Add(offs, v.ir.Imm64(ebytes));
|
||||
}
|
||||
}
|
||||
|
||||
IR::U64 offs = v.ir.Imm64(ebytes * selem);
|
||||
if (wback) {
|
||||
if (*Rm != Reg::SP) {
|
||||
offs = v.X(64, *Rm);
|
||||
}
|
||||
|
||||
if (Rn == Reg::SP) {
|
||||
v.SP(64, v.ir.Add(address, offs));
|
||||
} else {
|
||||
v.X(64, Rn, v.ir.Add(address, offs));
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -25,16 +25,18 @@ bool AbsoluteDifferenceLong(TranslatorVisitor& v, bool Q, Imm<2> size, Vec Vm, V
|
|||
const size_t esize = 8 << size.ZeroExtend();
|
||||
const size_t datasize = 64;
|
||||
|
||||
const IR::U128 operand1 = v.ir.VectorZeroExtend(esize, v.Vpart(datasize, Vn, Q));
|
||||
const IR::U128 operand2 = v.ir.VectorZeroExtend(esize, v.Vpart(datasize, Vm, Q));
|
||||
IR::U128 result = sign == SignednessSTD::Signed ? v.ir.VectorSignedAbsoluteDifference(esize, operand1, operand2)
|
||||
: v.ir.VectorUnsignedAbsoluteDifference(esize, operand1, operand2);
|
||||
|
||||
// Loads first, then operations
|
||||
auto const s_operand1 = v.Vpart(datasize, Vn, Q);
|
||||
auto const s_operand2 = v.Vpart(datasize, Vm, Q);
|
||||
const IR::U128 operand1 = v.ir.VectorZeroExtend(esize, s_operand1);
|
||||
const IR::U128 operand2 = v.ir.VectorZeroExtend(esize, s_operand2);
|
||||
IR::U128 result = sign == SignednessSTD::Signed
|
||||
? v.ir.VectorSignedAbsoluteDifference(esize, operand1, operand2)
|
||||
: v.ir.VectorUnsignedAbsoluteDifference(esize, operand1, operand2);
|
||||
if (behavior == AbsoluteDifferenceBehavior::Accumulate) {
|
||||
const IR::U128 data = v.V(2 * datasize, Vd);
|
||||
result = v.ir.VectorAdd(2 * esize, result, data);
|
||||
}
|
||||
|
||||
v.V(2 * datasize, Vd, result);
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -134,10 +134,8 @@ bool FPCompareRegister(TranslatorVisitor& v, bool Q, bool sz, Vec Vm, Vec Vn, Ve
|
|||
if (sz && !Q) {
|
||||
return v.ReservedValue();
|
||||
}
|
||||
|
||||
const size_t esize = sz ? 64 : 32;
|
||||
const size_t datasize = Q ? 128 : 64;
|
||||
|
||||
const IR::U128 operand1 = v.V(datasize, Vn);
|
||||
const IR::U128 operand2 = v.V(datasize, Vm);
|
||||
const IR::U128 result = [&] {
|
||||
|
@ -146,21 +144,22 @@ bool FPCompareRegister(TranslatorVisitor& v, bool Q, bool sz, Vec Vm, Vec Vn, Ve
|
|||
return v.ir.FPVectorEqual(esize, operand1, operand2);
|
||||
case ComparisonTypeSTS::GE:
|
||||
return v.ir.FPVectorGreaterEqual(esize, operand1, operand2);
|
||||
case ComparisonTypeSTS::AbsoluteGE:
|
||||
return v.ir.FPVectorGreaterEqual(esize,
|
||||
v.ir.FPVectorAbs(esize, operand1),
|
||||
v.ir.FPVectorAbs(esize, operand2));
|
||||
case ComparisonTypeSTS::AbsoluteGE: {
|
||||
auto const tmp1 = v.ir.FPVectorAbs(esize, operand1);
|
||||
auto const tmp2 = v.ir.FPVectorAbs(esize, operand2);
|
||||
return v.ir.FPVectorGreaterEqual(esize, tmp1, tmp2);
|
||||
}
|
||||
case ComparisonTypeSTS::GT:
|
||||
return v.ir.FPVectorGreater(esize, operand1, operand2);
|
||||
case ComparisonTypeSTS::AbsoluteGT:
|
||||
return v.ir.FPVectorGreater(esize,
|
||||
v.ir.FPVectorAbs(esize, operand1),
|
||||
v.ir.FPVectorAbs(esize, operand2));
|
||||
case ComparisonTypeSTS::AbsoluteGT: {
|
||||
auto const tmp1 = v.ir.FPVectorAbs(esize, operand1);
|
||||
auto const tmp2 = v.ir.FPVectorAbs(esize, operand2);
|
||||
return v.ir.FPVectorGreater(esize, tmp1, tmp2);
|
||||
}
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}();
|
||||
|
||||
v.V(datasize, Vd, result);
|
||||
return true;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue