mirror of
https://git.eden-emu.dev/eden-emu/eden.git
synced 2025-07-20 12:55:45 +00:00
PORT: NCE: Initial ryujinx host mapped memory
This commit is contained in:
parent
d3510b1397
commit
752945ee31
4 changed files with 371 additions and 18 deletions
|
@ -3,7 +3,9 @@
|
|||
|
||||
#include <cinttypes>
|
||||
#include <memory>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "common/signal_chain.h"
|
||||
#include "core/arm/nce/arm_nce.h"
|
||||
#include "core/arm/nce/interpreter_visitor.h"
|
||||
|
@ -43,6 +45,54 @@ constexpr u32 StackSize = 128_KiB;
|
|||
|
||||
} // namespace
|
||||
|
||||
// Implementation of the enhanced features inspired by Ryujinx NCE
|
||||
|
||||
void ArmNce::SetupAlternateSignalStack() {
|
||||
// Create an alternate stack for signal handling
|
||||
// This ensures we have a clean stack for handling signals even if the guest stack is corrupted
|
||||
m_alt_signal_stack = std::make_unique<u8[]>(AlternateStackSize);
|
||||
|
||||
stack_t ss{};
|
||||
ss.ss_sp = m_alt_signal_stack.get();
|
||||
ss.ss_size = AlternateStackSize;
|
||||
ss.ss_flags = 0;
|
||||
|
||||
if (sigaltstack(&ss, nullptr) != 0) {
|
||||
LOG_ERROR(Core_ARM, "Failed to setup alternate signal stack: {}", strerror(errno));
|
||||
} else {
|
||||
LOG_DEBUG(Core_ARM, "Alternate signal stack set up successfully");
|
||||
}
|
||||
}
|
||||
|
||||
void ArmNce::CleanupAlternateSignalStack() {
|
||||
if (m_alt_signal_stack) {
|
||||
stack_t ss{};
|
||||
ss.ss_flags = SS_DISABLE;
|
||||
|
||||
if (sigaltstack(&ss, nullptr) != 0) {
|
||||
LOG_ERROR(Core_ARM, "Failed to disable alternate signal stack: {}", strerror(errno));
|
||||
}
|
||||
|
||||
m_alt_signal_stack.reset();
|
||||
}
|
||||
}
|
||||
|
||||
bool ArmNce::HandleThreadInterrupt(GuestContext* ctx) {
|
||||
// Check if an interrupt was requested
|
||||
if (ctx->interrupt_requested.load(std::memory_order_acquire) != 0) {
|
||||
// Clear the interrupt request
|
||||
ctx->interrupt_requested.store(0, std::memory_order_release);
|
||||
|
||||
// Add break loop reason to indicate we should exit
|
||||
ctx->esr_el1.fetch_or(static_cast<u64>(HaltReason::BreakLoop));
|
||||
|
||||
// Indicate we handled an interrupt
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void* ArmNce::RestoreGuestContext(void* raw_context) {
|
||||
// Retrieve the host context.
|
||||
auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;
|
||||
|
@ -268,9 +318,18 @@ void ArmNce::SetSvcArguments(std::span<const uint64_t, 8> args) {
|
|||
ArmNce::ArmNce(System& system, bool uses_wall_clock, std::size_t core_index)
|
||||
: ArmInterface{uses_wall_clock}, m_system{system}, m_core_index{core_index} {
|
||||
m_guest_ctx.system = &m_system;
|
||||
m_guest_ctx.parent = this;
|
||||
|
||||
// Initialize as being in managed code
|
||||
m_guest_ctx.in_managed.store(1, std::memory_order_release);
|
||||
}
|
||||
|
||||
ArmNce::~ArmNce() = default;
|
||||
ArmNce::~ArmNce() {
|
||||
// Clean up alternate signal stack
|
||||
CleanupAlternateSignalStack();
|
||||
|
||||
// Host mapped memory will be cleaned up by its destructor
|
||||
}
|
||||
|
||||
void ArmNce::Initialize() {
|
||||
if (m_thread_id == -1) {
|
||||
|
@ -287,6 +346,16 @@ void ArmNce::Initialize() {
|
|||
sigaltstack(&ss, nullptr);
|
||||
}
|
||||
|
||||
// Set up alternate signal stack (Ryujinx-inspired enhancement)
|
||||
SetupAlternateSignalStack();
|
||||
|
||||
// Initialize host-mapped memory for efficient access
|
||||
if (!m_host_mapped_memory) {
|
||||
auto& memory = m_system.ApplicationMemory();
|
||||
m_host_mapped_memory = std::make_unique<HostMappedMemory>(memory);
|
||||
LOG_DEBUG(Core_ARM, "Initialized host-mapped memory for NCE");
|
||||
}
|
||||
|
||||
// Set up signals.
|
||||
static std::once_flag flag;
|
||||
std::call_once(flag, [] {
|
||||
|
@ -365,19 +434,23 @@ void ArmNce::SetContext(const Kernel::Svc::ThreadContext& ctx) {
|
|||
}
|
||||
|
||||
void ArmNce::SignalInterrupt(Kernel::KThread* thread) {
|
||||
// Add break loop condition.
|
||||
// Mark that we're requesting an interrupt
|
||||
m_guest_ctx.interrupt_requested.store(1, std::memory_order_release);
|
||||
|
||||
// Add break loop condition
|
||||
m_guest_ctx.esr_el1.fetch_or(static_cast<u64>(HaltReason::BreakLoop));
|
||||
|
||||
// Lock the thread context.
|
||||
// Lock the thread context
|
||||
auto* params = &thread->GetNativeExecutionParameters();
|
||||
LockThreadParameters(params);
|
||||
|
||||
if (params->is_running) {
|
||||
// We should signal to the running thread.
|
||||
// The running thread will unlock the thread context.
|
||||
// Only send a signal if the thread is running and not in managed code
|
||||
if (params->is_running && m_guest_ctx.in_managed.load(std::memory_order_acquire) == 0) {
|
||||
// Send signal to the running thread
|
||||
// The running thread will unlock the thread context
|
||||
syscall(SYS_tkill, m_thread_id, BreakFromRunCodeSignal);
|
||||
} else {
|
||||
// If the thread is no longer running, we have nothing to do.
|
||||
// If the thread is no longer running or is in managed code, we unlock
|
||||
UnlockThreadParameters(params);
|
||||
}
|
||||
}
|
||||
|
@ -402,21 +475,54 @@ void ArmNce::ClearInstructionCache() {
|
|||
}
|
||||
|
||||
void ArmNce::InvalidateCacheRange(u64 addr, std::size_t size) {
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
// Align the start address to cache line boundary for better performance
|
||||
const size_t CACHE_LINE_SIZE = 64;
|
||||
addr &= ~(CACHE_LINE_SIZE - 1);
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
while (size > 0) {
|
||||
const std::size_t size_step = std::min(size, CACHE_PAGE_SIZE);
|
||||
|
||||
// Round up size to nearest cache line
|
||||
size = (size + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1);
|
||||
// The __builtin___clear_cache intrinsic generates icache(i) invalidation and dcache(d)
|
||||
// write-back instructions targeting the range.
|
||||
char* addr_ptr = reinterpret_cast<char*>(addr);
|
||||
__builtin___clear_cache(addr_ptr, addr_ptr + size_step);
|
||||
|
||||
// Prefetch the range to be invalidated
|
||||
for (size_t offset = 0; offset < size; offset += CACHE_LINE_SIZE) {
|
||||
__builtin_prefetch((void*)(addr + offset), 1, 3);
|
||||
addr += size_step;
|
||||
size -= size_step;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Clear instruction cache after range invalidation
|
||||
this->ClearInstructionCache();
|
||||
#endif
|
||||
}
|
||||
|
||||
// Fast memory access template implementation (inspired by Ryujinx)
|
||||
template <typename T>
|
||||
T& ArmNce::GetHostRef(u64 guest_addr) {
|
||||
if (m_host_mapped_memory) {
|
||||
// Use the host-mapped memory for fast access
|
||||
try {
|
||||
return m_host_mapped_memory->GetRef<T>(guest_addr);
|
||||
} catch (const std::exception& e) {
|
||||
LOG_ERROR(Core_ARM, "Failed to get host reference: {}", e.what());
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to slower memory access
|
||||
T value{};
|
||||
m_system.ApplicationMemory().ReadBlock(guest_addr, &value, sizeof(T));
|
||||
static thread_local T fallback;
|
||||
fallback = value;
|
||||
return fallback;
|
||||
}
|
||||
|
||||
// Explicit instantiations for common types
|
||||
template u8& ArmNce::GetHostRef<u8>(u64);
|
||||
template u16& ArmNce::GetHostRef<u16>(u64);
|
||||
template u32& ArmNce::GetHostRef<u32>(u64);
|
||||
template u64& ArmNce::GetHostRef<u64>(u64);
|
||||
template s8& ArmNce::GetHostRef<s8>(u64);
|
||||
template s16& ArmNce::GetHostRef<s16>(u64);
|
||||
template s32& ArmNce::GetHostRef<s32>(u64);
|
||||
template s64& ArmNce::GetHostRef<s64>(u64);
|
||||
template f32& ArmNce::GetHostRef<f32>(u64);
|
||||
template f64& ArmNce::GetHostRef<f64>(u64);
|
||||
|
||||
} // namespace Core
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
#include "core/arm/arm_interface.h"
|
||||
#include "core/arm/nce/guest_context.h"
|
||||
#include "core/arm/nce/host_mapped_memory.h"
|
||||
|
||||
namespace Core::Memory {
|
||||
class Memory;
|
||||
|
@ -52,6 +53,10 @@ protected:
|
|||
|
||||
void RewindBreakpointInstruction() override {}
|
||||
|
||||
// Fast memory access using host-mapped memory (inspired by Ryujinx)
|
||||
template <typename T>
|
||||
T& GetHostRef(u64 guest_addr);
|
||||
|
||||
private:
|
||||
// Assembly definitions.
|
||||
static HaltReason ReturnToRunCodeByTrampoline(void* tpidr, GuestContext* ctx,
|
||||
|
@ -67,6 +72,13 @@ private:
|
|||
static void LockThreadParameters(void* tpidr);
|
||||
static void UnlockThreadParameters(void* tpidr);
|
||||
|
||||
// Alternate stack management (inspired by Ryujinx)
|
||||
void SetupAlternateSignalStack();
|
||||
void CleanupAlternateSignalStack();
|
||||
|
||||
// Enhanced signal handling
|
||||
static bool HandleThreadInterrupt(GuestContext* ctx);
|
||||
|
||||
private:
|
||||
// C++ implementation functions for assembly definitions.
|
||||
static void* RestoreGuestContext(void* raw_context);
|
||||
|
@ -90,6 +102,13 @@ public:
|
|||
|
||||
// Stack for signal processing.
|
||||
std::unique_ptr<u8[]> m_stack{};
|
||||
|
||||
// Alternate signal stack (inspired by Ryujinx)
|
||||
static constexpr size_t AlternateStackSize = 16384;
|
||||
std::unique_ptr<u8[]> m_alt_signal_stack{};
|
||||
|
||||
// Host mapped memory for efficient access (inspired by Ryujinx)
|
||||
std::unique_ptr<HostMappedMemory> m_host_mapped_memory{};
|
||||
};
|
||||
|
||||
} // namespace Core
|
||||
|
|
|
@ -38,6 +38,12 @@ struct GuestContext {
|
|||
u32 svc{};
|
||||
System* system{};
|
||||
ArmNce* parent{};
|
||||
|
||||
// Enhanced thread control (inspired by Ryujinx)
|
||||
std::atomic<u32> in_managed{1}; // 1 when in managed code, 0 when in native
|
||||
std::atomic<u32> interrupt_requested{0}; // Set when interrupt requested
|
||||
pid_t host_thread_id{-1}; // Host thread ID for signaling
|
||||
u64 ctr_el0{0x8444c004}; // Cache type register
|
||||
};
|
||||
|
||||
// Verify assembly offsets.
|
||||
|
|
222
src/core/arm/nce/host_mapped_memory.h
Normal file
222
src/core/arm/nce/host_mapped_memory.h
Normal file
|
@ -0,0 +1,222 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <span>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <sys/mman.h>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/page_table.h"
|
||||
|
||||
namespace Core::Memory {
|
||||
class Memory;
|
||||
}
|
||||
|
||||
namespace Core {
|
||||
|
||||
/**
|
||||
* HostMappedMemory provides direct host-mapped memory access for NCE
|
||||
* This is inspired by Ryujinx's MemoryManagerNative for faster memory operations
|
||||
*/
|
||||
class HostMappedMemory {
|
||||
public:
|
||||
explicit inline HostMappedMemory(Memory::Memory& memory) : memory{memory} {}
|
||||
inline ~HostMappedMemory() {
|
||||
// Unmap all allocations
|
||||
for (void* allocation : allocations) {
|
||||
if (munmap(allocation, page_size) != 0) {
|
||||
LOG_ERROR(Core_ARM, "Failed to unmap allocation at {:p}: {}", allocation, std::strerror(errno));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Maps a guest memory region to host memory
|
||||
* @param guest_addr Guest virtual address to map
|
||||
* @param size Size of the region to map
|
||||
* @return True if the mapping succeeded
|
||||
*/
|
||||
inline bool MapRegion(u64 guest_addr, u64 size) {
|
||||
const u64 start_page = guest_addr >> page_bits;
|
||||
const u64 end_page = (guest_addr + size + page_mask) >> page_bits;
|
||||
|
||||
for (u64 page = start_page; page < end_page; page++) {
|
||||
const u64 current_addr = page << page_bits;
|
||||
|
||||
// Skip if already mapped
|
||||
if (page_table.contains(page)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Allocate memory for this page
|
||||
void* allocation = mmap(nullptr, page_size, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (allocation == MAP_FAILED) {
|
||||
LOG_ERROR(Core_ARM, "Failed to allocate host page for guest address {:016X}: {}",
|
||||
current_addr, std::strerror(errno));
|
||||
return false;
|
||||
}
|
||||
|
||||
// Copy data from guest memory to our allocation
|
||||
bool result = false;
|
||||
std::array<u8, page_size> data;
|
||||
|
||||
// Try to read the memory from guest
|
||||
result = memory.ReadBlock(current_addr, data.data(), page_size);
|
||||
if (!result) {
|
||||
LOG_ERROR(Core_ARM, "Failed to read memory block at {:016X}", current_addr);
|
||||
munmap(allocation, page_size);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Copy to our allocation
|
||||
std::memcpy(allocation, data.data(), page_size);
|
||||
|
||||
// Store the allocation
|
||||
page_table[page] = static_cast<u8*>(allocation);
|
||||
allocations.push_back(allocation);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Unmaps a previously mapped guest memory region
|
||||
* @param guest_addr Guest virtual address to unmap
|
||||
* @param size Size of the region to unmap
|
||||
*/
|
||||
inline void UnmapRegion(u64 guest_addr, u64 size) {
|
||||
const u64 start_page = guest_addr >> page_bits;
|
||||
const u64 end_page = (guest_addr + size + page_mask) >> page_bits;
|
||||
|
||||
for (u64 page = start_page; page < end_page; page++) {
|
||||
// Skip if not mapped
|
||||
auto it = page_table.find(page);
|
||||
if (it == page_table.end()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
u8* host_ptr = it->second;
|
||||
|
||||
// Try to write the memory back to guest
|
||||
const u64 current_addr = page << page_bits;
|
||||
memory.WriteBlock(current_addr, host_ptr, page_size);
|
||||
|
||||
// Remove from page table
|
||||
page_table.erase(it);
|
||||
|
||||
// Don't unmap immediately - we'll do that in the destructor
|
||||
// to avoid potential reuse problems
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a typed reference to memory at the specified guest address
|
||||
* @tparam T Type of the reference to return
|
||||
* @param guest_addr Guest virtual address
|
||||
* @return Reference to the memory at the specified address
|
||||
* @note The memory region must be continuous and mapped
|
||||
*/
|
||||
template <typename T>
|
||||
T& GetRef(u64 guest_addr) {
|
||||
static_assert(std::is_trivially_copyable_v<T>, "T must be trivially copyable");
|
||||
|
||||
// Check if region covers the entire value
|
||||
const u64 page_offset = guest_addr & page_mask;
|
||||
|
||||
if (page_offset + sizeof(T) <= page_size) {
|
||||
// Fast path - contained within single page
|
||||
return *reinterpret_cast<T*>(TranslateAddress(guest_addr));
|
||||
} else {
|
||||
// Slow path - spans pages, need to check if all are mapped
|
||||
if (!IsRangeMapped(guest_addr, sizeof(T))) {
|
||||
throw std::runtime_error("Memory region is not continuous");
|
||||
}
|
||||
return *reinterpret_cast<T*>(TranslateAddress(guest_addr));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a span over memory at the specified guest address
|
||||
* @param guest_addr Guest virtual address
|
||||
* @param size Size of the span
|
||||
* @return Span over the memory at the specified address
|
||||
* @note The memory region must be continuous and mapped
|
||||
*/
|
||||
inline std::span<u8> GetSpan(u64 guest_addr, u64 size) {
|
||||
// Ensure the memory is mapped and continuous
|
||||
if (!IsRangeMapped(guest_addr, size)) {
|
||||
throw std::runtime_error("GetSpan requested on unmapped or non-continuous memory region");
|
||||
}
|
||||
|
||||
return std::span<u8>(TranslateAddress(guest_addr), size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if an address is mapped
|
||||
* @param guest_addr Guest virtual address to check
|
||||
* @return True if the address is mapped
|
||||
*/
|
||||
inline bool IsMapped(u64 guest_addr) const {
|
||||
const u64 page = guest_addr >> page_bits;
|
||||
return page_table.contains(page);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a range of memory is mapped continuously
|
||||
* @param guest_addr Starting guest virtual address
|
||||
* @param size Size of the region to check
|
||||
* @return True if the entire range is mapped continuously
|
||||
*/
|
||||
inline bool IsRangeMapped(u64 guest_addr, u64 size) const {
|
||||
const u64 start_page = guest_addr >> page_bits;
|
||||
const u64 end_page = (guest_addr + size + page_mask) >> page_bits;
|
||||
|
||||
for (u64 page = start_page; page < end_page; page++) {
|
||||
if (!page_table.contains(page)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the host address for a guest virtual address
|
||||
* @param guest_addr Guest virtual address to translate
|
||||
* @return Host address corresponding to the guest address
|
||||
*/
|
||||
inline u8* TranslateAddress(u64 guest_addr) {
|
||||
const u64 page = guest_addr >> page_bits;
|
||||
const u64 offset = guest_addr & page_mask;
|
||||
|
||||
auto it = page_table.find(page);
|
||||
if (it == page_table.end()) {
|
||||
throw std::runtime_error(fmt::format("Tried to translate unmapped address {:016X}", guest_addr));
|
||||
}
|
||||
|
||||
return it->second + offset;
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr u64 page_bits = 12;
|
||||
static constexpr u64 page_size = 1ULL << page_bits;
|
||||
static constexpr u64 page_mask = page_size - 1;
|
||||
|
||||
Memory::Memory& memory;
|
||||
|
||||
// Page table mapping guest pages to host addresses
|
||||
std::unordered_map<u64, u8*> page_table;
|
||||
|
||||
// Allocation pool for mapped regions
|
||||
std::vector<void*> allocations;
|
||||
};
|
||||
|
||||
} // namespace Core
|
Loading…
Add table
Add a link
Reference in a new issue