From 722c0d93a4b047dd3569ab9d506d5ae026f5101f Mon Sep 17 00:00:00 2001 From: lizzie Date: Fri, 11 Jul 2025 19:08:20 +0100 Subject: [PATCH] [dynarmic] reorg asms --- .../src/dynarmic/backend/x64/a64_emit_x64.cpp | 9 +++++---- .../backend/x64/emit_x64_data_processing.cpp | 14 ++++++++------ .../backend/x64/emit_x64_floating_point.cpp | 4 ++-- src/core/arm/dynarmic/arm_dynarmic_32.cpp | 8 +++----- 4 files changed, 18 insertions(+), 17 deletions(-) diff --git a/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp b/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp index 4d7bb0d7b1..1f2fc8c535 100644 --- a/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp @@ -185,14 +185,16 @@ void A64EmitX64::ClearFastDispatchTable() { } void A64EmitX64::GenTerminalHandlers() { - // PC ends up in rbp, location_descriptor ends up in rbx + // PC ends up in rbp, location_descriptor ends up in rbx; clobbers rcx const auto calculate_location_descriptor = [this] { // This calculation has to match up with A64::LocationDescriptor::UniqueHash // TODO: Optimization is available here based on known state of fpcr. code.mov(rbp, qword[r15 + offsetof(A64JitState, pc)]); + code.mov(ebx, dword[r15 + offsetof(A64JitState, fpcr)]); + // RBP = PC, RCX = PC & PcMask code.mov(rcx, A64::LocationDescriptor::pc_mask); code.and_(rcx, rbp); - code.mov(ebx, dword[r15 + offsetof(A64JitState, fpcr)]); + // RBX = ((FPCR & FpcrMask) << FpcrShift) | RCX code.and_(ebx, A64::LocationDescriptor::fpcr_mask); code.shl(rbx, A64::LocationDescriptor::fpcr_shift); code.or_(rbx, rcx); @@ -203,8 +205,8 @@ void A64EmitX64::GenTerminalHandlers() { code.align(); terminal_handler_pop_rsb_hint = code.getCurr(); + code.mov(eax, dword[r15 + offsetof(A64JitState, rsb_ptr)]); // Preload (avoid cache miss penalty) calculate_location_descriptor(); - code.mov(eax, dword[r15 + offsetof(A64JitState, rsb_ptr)]); code.dec(eax); code.and_(eax, u32(A64JitState::RSBPtrMask)); code.mov(dword[r15 + offsetof(A64JitState, rsb_ptr)], eax); @@ -428,7 +430,6 @@ void A64EmitX64::EmitA64SetD(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); const auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; - const Xbyak::Xmm to_store = ctx.reg_alloc.UseScratchXmm(args[1]); code.movq(to_store, to_store); // TODO: Remove when able code.movaps(addr, to_store); diff --git a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_data_processing.cpp b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_data_processing.cpp index cb1afdec9e..fc6a018f39 100644 --- a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_data_processing.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_data_processing.cpp @@ -106,21 +106,23 @@ void EmitX64::EmitMostSignificantBit(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitIsZero32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(args[0]); // TODO: Flag optimization - code.test(result, result); + code.xor_(result, result); + code.test(source, source); code.sete(result.cvt8()); - code.movzx(result, result.cvt8()); ctx.reg_alloc.DefineValue(inst, result); } void EmitX64::EmitIsZero64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); + const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(args[0]); // TODO: Flag optimization - code.test(result, result); + code.xor_(result, result); + code.test(source, source); code.sete(result.cvt8()); - code.movzx(result, result.cvt8()); ctx.reg_alloc.DefineValue(inst, result); } diff --git a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp index aeb4ceac3c..63b8346468 100644 --- a/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp @@ -762,9 +762,9 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { code.L(op1_done); FCODE(ucomis)(operand2, operand2); - code.jnp(op2_done); + code.jnp(op2_done, code.T_NEAR); code.ptest(operand2, xmm0); - code.jnz(op2_done); + code.jnz(op2_done, code.T_NEAR); code.vorps(result, operand2, xmm0); if constexpr (negate_product) { code.xorps(result, code.Const(xword, FP::FPInfo::sign_mask)); diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index d21aa5aacf..b333509306 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -351,9 +351,8 @@ u32 ArmDynarmic32::GetSvcNumber() const { } void ArmDynarmic32::GetSvcArguments(std::span args) const { - Dynarmic::A32::Jit& j = *m_jit; - auto& gpr = j.Regs(); - + Dynarmic::A32::Jit const& j = *m_jit; + auto const& gpr = j.Regs(); for (size_t i = 0; i < 8; i++) { args[i] = gpr[i]; } @@ -362,9 +361,8 @@ void ArmDynarmic32::GetSvcArguments(std::span args) const { void ArmDynarmic32::SetSvcArguments(std::span args) { Dynarmic::A32::Jit& j = *m_jit; auto& gpr = j.Regs(); - for (size_t i = 0; i < 8; i++) { - gpr[i] = static_cast(args[i]); + gpr[i] = u32(args[i]); } }