PORT: NCE: Initial ryujinx host mapped memory

This commit is contained in:
MrPurple666 2025-05-19 00:36:07 -03:00
parent d3510b1397
commit 752945ee31
4 changed files with 371 additions and 18 deletions

View file

@ -3,7 +3,9 @@
#include <cinttypes> #include <cinttypes>
#include <memory> #include <memory>
#include <sys/mman.h>
#include "common/logging/log.h"
#include "common/signal_chain.h" #include "common/signal_chain.h"
#include "core/arm/nce/arm_nce.h" #include "core/arm/nce/arm_nce.h"
#include "core/arm/nce/interpreter_visitor.h" #include "core/arm/nce/interpreter_visitor.h"
@ -43,6 +45,54 @@ constexpr u32 StackSize = 128_KiB;
} // namespace } // namespace
// Implementation of the enhanced features inspired by Ryujinx NCE
void ArmNce::SetupAlternateSignalStack() {
// Create an alternate stack for signal handling
// This ensures we have a clean stack for handling signals even if the guest stack is corrupted
m_alt_signal_stack = std::make_unique<u8[]>(AlternateStackSize);
stack_t ss{};
ss.ss_sp = m_alt_signal_stack.get();
ss.ss_size = AlternateStackSize;
ss.ss_flags = 0;
if (sigaltstack(&ss, nullptr) != 0) {
LOG_ERROR(Core_ARM, "Failed to setup alternate signal stack: {}", strerror(errno));
} else {
LOG_DEBUG(Core_ARM, "Alternate signal stack set up successfully");
}
}
void ArmNce::CleanupAlternateSignalStack() {
if (m_alt_signal_stack) {
stack_t ss{};
ss.ss_flags = SS_DISABLE;
if (sigaltstack(&ss, nullptr) != 0) {
LOG_ERROR(Core_ARM, "Failed to disable alternate signal stack: {}", strerror(errno));
}
m_alt_signal_stack.reset();
}
}
bool ArmNce::HandleThreadInterrupt(GuestContext* ctx) {
// Check if an interrupt was requested
if (ctx->interrupt_requested.load(std::memory_order_acquire) != 0) {
// Clear the interrupt request
ctx->interrupt_requested.store(0, std::memory_order_release);
// Add break loop reason to indicate we should exit
ctx->esr_el1.fetch_or(static_cast<u64>(HaltReason::BreakLoop));
// Indicate we handled an interrupt
return true;
}
return false;
}
void* ArmNce::RestoreGuestContext(void* raw_context) { void* ArmNce::RestoreGuestContext(void* raw_context) {
// Retrieve the host context. // Retrieve the host context.
auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext; auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;
@ -268,9 +318,18 @@ void ArmNce::SetSvcArguments(std::span<const uint64_t, 8> args) {
ArmNce::ArmNce(System& system, bool uses_wall_clock, std::size_t core_index) ArmNce::ArmNce(System& system, bool uses_wall_clock, std::size_t core_index)
: ArmInterface{uses_wall_clock}, m_system{system}, m_core_index{core_index} { : ArmInterface{uses_wall_clock}, m_system{system}, m_core_index{core_index} {
m_guest_ctx.system = &m_system; m_guest_ctx.system = &m_system;
m_guest_ctx.parent = this;
// Initialize as being in managed code
m_guest_ctx.in_managed.store(1, std::memory_order_release);
} }
ArmNce::~ArmNce() = default; ArmNce::~ArmNce() {
// Clean up alternate signal stack
CleanupAlternateSignalStack();
// Host mapped memory will be cleaned up by its destructor
}
void ArmNce::Initialize() { void ArmNce::Initialize() {
if (m_thread_id == -1) { if (m_thread_id == -1) {
@ -287,6 +346,16 @@ void ArmNce::Initialize() {
sigaltstack(&ss, nullptr); sigaltstack(&ss, nullptr);
} }
// Set up alternate signal stack (Ryujinx-inspired enhancement)
SetupAlternateSignalStack();
// Initialize host-mapped memory for efficient access
if (!m_host_mapped_memory) {
auto& memory = m_system.ApplicationMemory();
m_host_mapped_memory = std::make_unique<HostMappedMemory>(memory);
LOG_DEBUG(Core_ARM, "Initialized host-mapped memory for NCE");
}
// Set up signals. // Set up signals.
static std::once_flag flag; static std::once_flag flag;
std::call_once(flag, [] { std::call_once(flag, [] {
@ -365,19 +434,23 @@ void ArmNce::SetContext(const Kernel::Svc::ThreadContext& ctx) {
} }
void ArmNce::SignalInterrupt(Kernel::KThread* thread) { void ArmNce::SignalInterrupt(Kernel::KThread* thread) {
// Add break loop condition. // Mark that we're requesting an interrupt
m_guest_ctx.interrupt_requested.store(1, std::memory_order_release);
// Add break loop condition
m_guest_ctx.esr_el1.fetch_or(static_cast<u64>(HaltReason::BreakLoop)); m_guest_ctx.esr_el1.fetch_or(static_cast<u64>(HaltReason::BreakLoop));
// Lock the thread context. // Lock the thread context
auto* params = &thread->GetNativeExecutionParameters(); auto* params = &thread->GetNativeExecutionParameters();
LockThreadParameters(params); LockThreadParameters(params);
if (params->is_running) { // Only send a signal if the thread is running and not in managed code
// We should signal to the running thread. if (params->is_running && m_guest_ctx.in_managed.load(std::memory_order_acquire) == 0) {
// The running thread will unlock the thread context. // Send signal to the running thread
// The running thread will unlock the thread context
syscall(SYS_tkill, m_thread_id, BreakFromRunCodeSignal); syscall(SYS_tkill, m_thread_id, BreakFromRunCodeSignal);
} else { } else {
// If the thread is no longer running, we have nothing to do. // If the thread is no longer running or is in managed code, we unlock
UnlockThreadParameters(params); UnlockThreadParameters(params);
} }
} }
@ -402,21 +475,54 @@ void ArmNce::ClearInstructionCache() {
} }
void ArmNce::InvalidateCacheRange(u64 addr, std::size_t size) { void ArmNce::InvalidateCacheRange(u64 addr, std::size_t size) {
#if defined(__GNUC__) || defined(__clang__) #if defined(__GNUC__) || defined(__clang__)
// Align the start address to cache line boundary for better performance while (size > 0) {
const size_t CACHE_LINE_SIZE = 64; const std::size_t size_step = std::min(size, CACHE_PAGE_SIZE);
addr &= ~(CACHE_LINE_SIZE - 1);
// Round up size to nearest cache line // The __builtin___clear_cache intrinsic generates icache(i) invalidation and dcache(d)
size = (size + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1); // write-back instructions targeting the range.
char* addr_ptr = reinterpret_cast<char*>(addr);
__builtin___clear_cache(addr_ptr, addr_ptr + size_step);
// Prefetch the range to be invalidated addr += size_step;
for (size_t offset = 0; offset < size; offset += CACHE_LINE_SIZE) { size -= size_step;
__builtin_prefetch((void*)(addr + offset), 1, 3); }
}
#endif
// Clear instruction cache after range invalidation
this->ClearInstructionCache(); this->ClearInstructionCache();
#endif
} }
// Fast memory access template implementation (inspired by Ryujinx)
template <typename T>
T& ArmNce::GetHostRef(u64 guest_addr) {
if (m_host_mapped_memory) {
// Use the host-mapped memory for fast access
try {
return m_host_mapped_memory->GetRef<T>(guest_addr);
} catch (const std::exception& e) {
LOG_ERROR(Core_ARM, "Failed to get host reference: {}", e.what());
}
}
// Fallback to slower memory access
T value{};
m_system.ApplicationMemory().ReadBlock(guest_addr, &value, sizeof(T));
static thread_local T fallback;
fallback = value;
return fallback;
}
// Explicit instantiations for common types
template u8& ArmNce::GetHostRef<u8>(u64);
template u16& ArmNce::GetHostRef<u16>(u64);
template u32& ArmNce::GetHostRef<u32>(u64);
template u64& ArmNce::GetHostRef<u64>(u64);
template s8& ArmNce::GetHostRef<s8>(u64);
template s16& ArmNce::GetHostRef<s16>(u64);
template s32& ArmNce::GetHostRef<s32>(u64);
template s64& ArmNce::GetHostRef<s64>(u64);
template f32& ArmNce::GetHostRef<f32>(u64);
template f64& ArmNce::GetHostRef<f64>(u64);
} // namespace Core } // namespace Core

View file

@ -7,6 +7,7 @@
#include "core/arm/arm_interface.h" #include "core/arm/arm_interface.h"
#include "core/arm/nce/guest_context.h" #include "core/arm/nce/guest_context.h"
#include "core/arm/nce/host_mapped_memory.h"
namespace Core::Memory { namespace Core::Memory {
class Memory; class Memory;
@ -52,6 +53,10 @@ protected:
void RewindBreakpointInstruction() override {} void RewindBreakpointInstruction() override {}
// Fast memory access using host-mapped memory (inspired by Ryujinx)
template <typename T>
T& GetHostRef(u64 guest_addr);
private: private:
// Assembly definitions. // Assembly definitions.
static HaltReason ReturnToRunCodeByTrampoline(void* tpidr, GuestContext* ctx, static HaltReason ReturnToRunCodeByTrampoline(void* tpidr, GuestContext* ctx,
@ -67,6 +72,13 @@ private:
static void LockThreadParameters(void* tpidr); static void LockThreadParameters(void* tpidr);
static void UnlockThreadParameters(void* tpidr); static void UnlockThreadParameters(void* tpidr);
// Alternate stack management (inspired by Ryujinx)
void SetupAlternateSignalStack();
void CleanupAlternateSignalStack();
// Enhanced signal handling
static bool HandleThreadInterrupt(GuestContext* ctx);
private: private:
// C++ implementation functions for assembly definitions. // C++ implementation functions for assembly definitions.
static void* RestoreGuestContext(void* raw_context); static void* RestoreGuestContext(void* raw_context);
@ -90,6 +102,13 @@ public:
// Stack for signal processing. // Stack for signal processing.
std::unique_ptr<u8[]> m_stack{}; std::unique_ptr<u8[]> m_stack{};
// Alternate signal stack (inspired by Ryujinx)
static constexpr size_t AlternateStackSize = 16384;
std::unique_ptr<u8[]> m_alt_signal_stack{};
// Host mapped memory for efficient access (inspired by Ryujinx)
std::unique_ptr<HostMappedMemory> m_host_mapped_memory{};
}; };
} // namespace Core } // namespace Core

View file

@ -38,6 +38,12 @@ struct GuestContext {
u32 svc{}; u32 svc{};
System* system{}; System* system{};
ArmNce* parent{}; ArmNce* parent{};
// Enhanced thread control (inspired by Ryujinx)
std::atomic<u32> in_managed{1}; // 1 when in managed code, 0 when in native
std::atomic<u32> interrupt_requested{0}; // Set when interrupt requested
pid_t host_thread_id{-1}; // Host thread ID for signaling
u64 ctr_el0{0x8444c004}; // Cache type register
}; };
// Verify assembly offsets. // Verify assembly offsets.

View file

@ -0,0 +1,222 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <span>
#include <stdexcept>
#include <string>
#include <sys/mman.h>
#include <unordered_map>
#include <vector>
#include "common/common_types.h"
#include "common/logging/log.h"
#include "common/page_table.h"
namespace Core::Memory {
class Memory;
}
namespace Core {
/**
* HostMappedMemory provides direct host-mapped memory access for NCE
* This is inspired by Ryujinx's MemoryManagerNative for faster memory operations
*/
class HostMappedMemory {
public:
explicit inline HostMappedMemory(Memory::Memory& memory) : memory{memory} {}
inline ~HostMappedMemory() {
// Unmap all allocations
for (void* allocation : allocations) {
if (munmap(allocation, page_size) != 0) {
LOG_ERROR(Core_ARM, "Failed to unmap allocation at {:p}: {}", allocation, std::strerror(errno));
}
}
}
/**
* Maps a guest memory region to host memory
* @param guest_addr Guest virtual address to map
* @param size Size of the region to map
* @return True if the mapping succeeded
*/
inline bool MapRegion(u64 guest_addr, u64 size) {
const u64 start_page = guest_addr >> page_bits;
const u64 end_page = (guest_addr + size + page_mask) >> page_bits;
for (u64 page = start_page; page < end_page; page++) {
const u64 current_addr = page << page_bits;
// Skip if already mapped
if (page_table.contains(page)) {
continue;
}
// Allocate memory for this page
void* allocation = mmap(nullptr, page_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (allocation == MAP_FAILED) {
LOG_ERROR(Core_ARM, "Failed to allocate host page for guest address {:016X}: {}",
current_addr, std::strerror(errno));
return false;
}
// Copy data from guest memory to our allocation
bool result = false;
std::array<u8, page_size> data;
// Try to read the memory from guest
result = memory.ReadBlock(current_addr, data.data(), page_size);
if (!result) {
LOG_ERROR(Core_ARM, "Failed to read memory block at {:016X}", current_addr);
munmap(allocation, page_size);
return false;
}
// Copy to our allocation
std::memcpy(allocation, data.data(), page_size);
// Store the allocation
page_table[page] = static_cast<u8*>(allocation);
allocations.push_back(allocation);
}
return true;
}
/**
* Unmaps a previously mapped guest memory region
* @param guest_addr Guest virtual address to unmap
* @param size Size of the region to unmap
*/
inline void UnmapRegion(u64 guest_addr, u64 size) {
const u64 start_page = guest_addr >> page_bits;
const u64 end_page = (guest_addr + size + page_mask) >> page_bits;
for (u64 page = start_page; page < end_page; page++) {
// Skip if not mapped
auto it = page_table.find(page);
if (it == page_table.end()) {
continue;
}
u8* host_ptr = it->second;
// Try to write the memory back to guest
const u64 current_addr = page << page_bits;
memory.WriteBlock(current_addr, host_ptr, page_size);
// Remove from page table
page_table.erase(it);
// Don't unmap immediately - we'll do that in the destructor
// to avoid potential reuse problems
}
}
/**
* Gets a typed reference to memory at the specified guest address
* @tparam T Type of the reference to return
* @param guest_addr Guest virtual address
* @return Reference to the memory at the specified address
* @note The memory region must be continuous and mapped
*/
template <typename T>
T& GetRef(u64 guest_addr) {
static_assert(std::is_trivially_copyable_v<T>, "T must be trivially copyable");
// Check if region covers the entire value
const u64 page_offset = guest_addr & page_mask;
if (page_offset + sizeof(T) <= page_size) {
// Fast path - contained within single page
return *reinterpret_cast<T*>(TranslateAddress(guest_addr));
} else {
// Slow path - spans pages, need to check if all are mapped
if (!IsRangeMapped(guest_addr, sizeof(T))) {
throw std::runtime_error("Memory region is not continuous");
}
return *reinterpret_cast<T*>(TranslateAddress(guest_addr));
}
}
/**
* Gets a span over memory at the specified guest address
* @param guest_addr Guest virtual address
* @param size Size of the span
* @return Span over the memory at the specified address
* @note The memory region must be continuous and mapped
*/
inline std::span<u8> GetSpan(u64 guest_addr, u64 size) {
// Ensure the memory is mapped and continuous
if (!IsRangeMapped(guest_addr, size)) {
throw std::runtime_error("GetSpan requested on unmapped or non-continuous memory region");
}
return std::span<u8>(TranslateAddress(guest_addr), size);
}
/**
* Checks if an address is mapped
* @param guest_addr Guest virtual address to check
* @return True if the address is mapped
*/
inline bool IsMapped(u64 guest_addr) const {
const u64 page = guest_addr >> page_bits;
return page_table.contains(page);
}
/**
* Checks if a range of memory is mapped continuously
* @param guest_addr Starting guest virtual address
* @param size Size of the region to check
* @return True if the entire range is mapped continuously
*/
inline bool IsRangeMapped(u64 guest_addr, u64 size) const {
const u64 start_page = guest_addr >> page_bits;
const u64 end_page = (guest_addr + size + page_mask) >> page_bits;
for (u64 page = start_page; page < end_page; page++) {
if (!page_table.contains(page)) {
return false;
}
}
return true;
}
/**
* Gets the host address for a guest virtual address
* @param guest_addr Guest virtual address to translate
* @return Host address corresponding to the guest address
*/
inline u8* TranslateAddress(u64 guest_addr) {
const u64 page = guest_addr >> page_bits;
const u64 offset = guest_addr & page_mask;
auto it = page_table.find(page);
if (it == page_table.end()) {
throw std::runtime_error(fmt::format("Tried to translate unmapped address {:016X}", guest_addr));
}
return it->second + offset;
}
private:
static constexpr u64 page_bits = 12;
static constexpr u64 page_size = 1ULL << page_bits;
static constexpr u64 page_mask = page_size - 1;
Memory::Memory& memory;
// Page table mapping guest pages to host addresses
std::unordered_map<u64, u8*> page_table;
// Allocation pool for mapped regions
std::vector<void*> allocations;
};
} // namespace Core