[dynarmic] reorg asms

This commit is contained in:
lizzie 2025-07-11 19:08:20 +01:00
parent cab69f5ea9
commit 722c0d93a4
4 changed files with 18 additions and 17 deletions

View file

@ -185,14 +185,16 @@ void A64EmitX64::ClearFastDispatchTable() {
}
void A64EmitX64::GenTerminalHandlers() {
// PC ends up in rbp, location_descriptor ends up in rbx
// PC ends up in rbp, location_descriptor ends up in rbx; clobbers rcx
const auto calculate_location_descriptor = [this] {
// This calculation has to match up with A64::LocationDescriptor::UniqueHash
// TODO: Optimization is available here based on known state of fpcr.
code.mov(rbp, qword[r15 + offsetof(A64JitState, pc)]);
code.mov(ebx, dword[r15 + offsetof(A64JitState, fpcr)]);
// RBP = PC, RCX = PC & PcMask
code.mov(rcx, A64::LocationDescriptor::pc_mask);
code.and_(rcx, rbp);
code.mov(ebx, dword[r15 + offsetof(A64JitState, fpcr)]);
// RBX = ((FPCR & FpcrMask) << FpcrShift) | RCX
code.and_(ebx, A64::LocationDescriptor::fpcr_mask);
code.shl(rbx, A64::LocationDescriptor::fpcr_shift);
code.or_(rbx, rcx);
@ -203,8 +205,8 @@ void A64EmitX64::GenTerminalHandlers() {
code.align();
terminal_handler_pop_rsb_hint = code.getCurr<const void*>();
code.mov(eax, dword[r15 + offsetof(A64JitState, rsb_ptr)]); // Preload (avoid cache miss penalty)
calculate_location_descriptor();
code.mov(eax, dword[r15 + offsetof(A64JitState, rsb_ptr)]);
code.dec(eax);
code.and_(eax, u32(A64JitState::RSBPtrMask));
code.mov(dword[r15 + offsetof(A64JitState, rsb_ptr)], eax);
@ -428,7 +430,6 @@ void A64EmitX64::EmitA64SetD(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
const auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
const Xbyak::Xmm to_store = ctx.reg_alloc.UseScratchXmm(args[1]);
code.movq(to_store, to_store); // TODO: Remove when able
code.movaps(addr, to_store);

View file

@ -106,21 +106,23 @@ void EmitX64::EmitMostSignificantBit(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitIsZero32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(args[0]);
// TODO: Flag optimization
code.test(result, result);
code.xor_(result, result);
code.test(source, source);
code.sete(result.cvt8());
code.movzx(result, result.cvt8());
ctx.reg_alloc.DefineValue(inst, result);
}
void EmitX64::EmitIsZero64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]);
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(args[0]);
// TODO: Flag optimization
code.test(result, result);
code.xor_(result, result);
code.test(source, source);
code.sete(result.cvt8());
code.movzx(result, result.cvt8());
ctx.reg_alloc.DefineValue(inst, result);
}

View file

@ -762,9 +762,9 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
code.L(op1_done);
FCODE(ucomis)(operand2, operand2);
code.jnp(op2_done);
code.jnp(op2_done, code.T_NEAR);
code.ptest(operand2, xmm0);
code.jnz(op2_done);
code.jnz(op2_done, code.T_NEAR);
code.vorps(result, operand2, xmm0);
if constexpr (negate_product) {
code.xorps(result, code.Const(xword, FP::FPInfo<FPT>::sign_mask));

View file

@ -351,9 +351,8 @@ u32 ArmDynarmic32::GetSvcNumber() const {
}
void ArmDynarmic32::GetSvcArguments(std::span<uint64_t, 8> args) const {
Dynarmic::A32::Jit& j = *m_jit;
auto& gpr = j.Regs();
Dynarmic::A32::Jit const& j = *m_jit;
auto const& gpr = j.Regs();
for (size_t i = 0; i < 8; i++) {
args[i] = gpr[i];
}
@ -362,9 +361,8 @@ void ArmDynarmic32::GetSvcArguments(std::span<uint64_t, 8> args) const {
void ArmDynarmic32::SetSvcArguments(std::span<const uint64_t, 8> args) {
Dynarmic::A32::Jit& j = *m_jit;
auto& gpr = j.Regs();
for (size_t i = 0; i < 8; i++) {
gpr[i] = static_cast<u32>(args[i]);
gpr[i] = u32(args[i]);
}
}