From fa600b88b180bc3de4009c1ee771f2cc1a481730 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Mon, 30 Jun 2025 12:57:21 +0000 Subject: [PATCH] revert 91a662431cfe419bd46b6ca4ed298a4ba70d7820 (#240) revert [Texture_cache] Better memory handling for devices with lower memory allocations (#233) Means games like Minecraft Dungeons, Sea of Stars, Luigi Mansion 2, Astroneer, Alan Wake, etc are now playable. It also cleans up the recent abi.cpp and bindless texture commits a bit. Everything is in #ifdef ANDROID - The biggest change is CACHING_PAGEBITS = 12. Without that the way the buffercache grows and joins buffers can cause Android to run out of memory (as you end up with just one big buffer that needs to be copied every time it grows) Also patches up ffmpeg issues. Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/233 Co-authored-by: JPikachu Co-committed-by: JPikachu Had showed some regressions on devices with higher specifications, will be refined to return as a toggle in a later commit. Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/240 --- .../dynarmic/src/dynarmic/backend/x64/abi.cpp | 25 ++++++++++++++++--- src/core/arm/nce/arm_nce.cpp | 2 +- src/core/hle/service/sockets/sfdnsres.cpp | 14 ++--------- .../maxwell/translate/impl/warp_shuffle.cpp | 17 +------------ src/shader_recompiler/ir_opt/texture_pass.cpp | 20 +++++++++++++-- src/video_core/buffer_cache/buffer_cache.h | 10 +------- .../buffer_cache/buffer_cache_base.h | 17 +------------ src/video_core/cdma_pusher.cpp | 3 --- src/video_core/delayed_destruction_ring.h | 1 + src/video_core/host1x/ffmpeg/ffmpeg.cpp | 19 +++++--------- src/video_core/host1x/ffmpeg/ffmpeg.h | 1 - .../renderer_vulkan/present/layer.cpp | 1 - src/video_core/texture_cache/texture_cache.h | 3 --- .../texture_cache/texture_cache_base.h | 11 -------- .../vulkan_common/vulkan_memory_allocator.cpp | 5 +--- 15 files changed, 53 insertions(+), 96 deletions(-) diff --git a/externals/dynarmic/src/dynarmic/backend/x64/abi.cpp b/externals/dynarmic/src/dynarmic/backend/x64/abi.cpp index c97d98d9ae..7cbe92aaf3 100644 --- a/externals/dynarmic/src/dynarmic/backend/x64/abi.cpp +++ b/externals/dynarmic/src/dynarmic/backend/x64/abi.cpp @@ -116,13 +116,18 @@ void ABI_PopCallerSaveRegistersAndAdjustStack(BlockOfCode& code, const std::size ABI_PopRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLER_SAVE); } -static std::vector ABI_AllCallerSaveExcept(const std::size_t except) noexcept { - std::vector arr; - std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(arr), static_cast(except)); +static consteval size_t ABI_AllCallerSaveSize() noexcept { + return ABI_ALL_CALLER_SAVE.max_size(); +} +static consteval std::array ABI_AllCallerSaveExcept(const std::size_t except) noexcept { + std::array arr; + for(std::size_t i = 0; i < arr.size(); ++i) { + arr[i] = static_cast(i + (i >= except ? 1 : 0)); + } return arr; } -alignas(64) static std::vector ABI_CALLER_SAVED_EXCEPT_TABLE[32] = { +alignas(64) static constinit std::array ABI_CALLER_SAVED_EXCEPT_TABLE[32] = { ABI_AllCallerSaveExcept(0), ABI_AllCallerSaveExcept(1), ABI_AllCallerSaveExcept(2), @@ -158,12 +163,24 @@ alignas(64) static std::vector ABI_CALLER_SAVED_EXCEPT_TABLE[32] = { }; void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, const HostLoc exception) { +#ifdef _MSC_VER + std::vector regs; + std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception); + ABI_PushRegistersAndAdjustStack(code, 0, regs); +#else ASSUME(size_t(exception) < 32); ABI_PushRegistersAndAdjustStack(code, 0, ABI_CALLER_SAVED_EXCEPT_TABLE[size_t(exception)]); +#endif } void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, const HostLoc exception) { +#ifdef _MSC_VER + std::vector regs; + std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception); + ABI_PopRegistersAndAdjustStack(code, 0, regs); +#else ASSUME(size_t(exception) < 32); ABI_PopRegistersAndAdjustStack(code, 0, ABI_CALLER_SAVED_EXCEPT_TABLE[size_t(exception)]); +#endif } } // namespace Dynarmic::Backend::X64 diff --git a/src/core/arm/nce/arm_nce.cpp b/src/core/arm/nce/arm_nce.cpp index e63cf82cc5..90891e241d 100644 --- a/src/core/arm/nce/arm_nce.cpp +++ b/src/core/arm/nce/arm_nce.cpp @@ -224,7 +224,7 @@ HaltReason ArmNce::RunThread(Kernel::KThread* thread) { if (auto it = post_handlers.find(m_guest_ctx.pc); it != post_handlers.end()) { hr = ReturnToRunCodeByTrampoline(thread_params, &m_guest_ctx, it->second); } else { - hr = ReturnToRunCodeByExceptionLevelChange(m_thread_id, thread_params); // Android: Use "process handle SIGUSR2 -n true -p true -s false" (and SIGURG) in LLDB when debugging + hr = ReturnToRunCodeByExceptionLevelChange(m_thread_id, thread_params); } // Critical section for thread cleanup diff --git a/src/core/hle/service/sockets/sfdnsres.cpp b/src/core/hle/service/sockets/sfdnsres.cpp index 034a4b013c..c657c4efd8 100644 --- a/src/core/hle/service/sockets/sfdnsres.cpp +++ b/src/core/hle/service/sockets/sfdnsres.cpp @@ -151,12 +151,7 @@ static std::pair GetHostByNameRequestImpl(HLERequestConte // For now, ignore options, which are in input buffer 1 for GetHostByNameRequestWithOptions. // Prevent resolution of Nintendo servers - if (host.find("srv.nintendo.net") != std::string::npos || - host.find("battle.net") != std::string::npos || - host.find("microsoft.com") != std::string::npos || - host.find("mojang.com") != std::string::npos || - host.find("xboxlive.com") != std::string::npos || - host.find("minecraftservices.com") != std::string::npos) { + if (host.find("srv.nintendo.net") != std::string::npos) { LOG_WARNING(Network, "Resolution of hostname {} requested, returning EAI_AGAIN", host); return {0, GetAddrInfoError::AGAIN}; } @@ -273,12 +268,7 @@ static std::pair GetAddrInfoRequestImpl(HLERequestContext const std::string host = Common::StringFromBuffer(host_buffer); // Prevent resolution of Nintendo servers - if (host.find("srv.nintendo.net") != std::string::npos || - host.find("battle.net") != std::string::npos || - host.find("microsoft.com") != std::string::npos || - host.find("mojang.com") != std::string::npos || - host.find("xboxlive.com") != std::string::npos || - host.find("minecraftservices.com") != std::string::npos) { + if (host.find("srv.nintendo.net") != std::string::npos) { LOG_WARNING(Network, "Resolution of hostname {} requested, returning EAI_AGAIN", host); return {0, GetAddrInfoError::AGAIN}; } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp index da2e31c012..f0436994b2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp @@ -4,7 +4,6 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" -#include namespace Shader::Maxwell { namespace { @@ -34,17 +33,6 @@ enum class ShuffleMode : u64 { } } -bool IsKONA() { - std::ifstream machineFile("/sys/devices/soc0/machine"); - if (machineFile.is_open()) { - std::string line; - std::getline(machineFile, line); - if (line == "KONA") - return true; - } - return false; -} - void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32& mask) { union { u64 insn; @@ -56,10 +44,7 @@ void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32 const IR::U32 result{ShuffleOperation(v.ir, v.X(shfl.src_reg), index, mask, shfl.mode)}; v.ir.SetPred(shfl.pred, v.ir.GetInBoundsFromOp(result)); - if (IsKONA()) - v.X(shfl.dest_reg, v.ir.Imm32(0xffffffff)); // This fixes the freeze for Retroid / Snapdragon SD865 - else - v.X(shfl.dest_reg, result); + v.X(shfl.dest_reg, result); } } // Anonymous namespace diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 44ffa2b578..ef1ef63500 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -327,14 +327,30 @@ std::optional TryGetConstBuffer(const IR::Inst* inst, Environme }; } +// TODO:xbzk: shall be dropped when Track method cover all bindless stuff +static ConstBufferAddr last_valid_addr = ConstBufferAddr{ + .index = 0, + .offset = 0, + .shift_left = 0, + .secondary_index = 0, + .secondary_offset = 0, + .secondary_shift_left = 0, + .dynamic_offset = {}, + .count = 1, + .has_secondary = false, +}; + TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { - ConstBufferAddr addr{}; + ConstBufferAddr addr; if (IsBindless(inst)) { const std::optional track_addr{Track(inst.Arg(0), env)}; + if (!track_addr) { - LOG_WARNING(Shader, "Failed to track bindless texture constant buffer"); + //throw NotImplementedException("Failed to track bindless texture constant buffer"); + addr = last_valid_addr; // TODO:xbzk: shall be dropped when Track method cover all bindless stuff } else { addr = *track_addr; + last_valid_addr = addr; // TODO:xbzk: shall be dropped when Track method cover all bindless stuff } } else { addr = ConstBufferAddr{ diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index c7e287553e..af237703d5 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -50,7 +50,7 @@ BufferCache

::~BufferCache() = default; template void BufferCache

::RunGarbageCollector() { const bool aggressive_gc = total_used_memory >= critical_memory; - const u64 ticks_to_destroy = aggressive_gc ? 60 : 150; + const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; int num_iterations = aggressive_gc ? 64 : 32; const auto clean_up = [this, &num_iterations](BufferId buffer_id) { if (num_iterations == 0) { @@ -1380,9 +1380,6 @@ void BufferCache

::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, }); new_buffer.MarkUsage(copies[0].dst_offset, copies[0].size); runtime.CopyBuffer(new_buffer, overlap, copies, true); -#ifdef ANDROID - runtime.Finish(); -#endif DeleteBuffer(overlap_id, true); } @@ -1674,12 +1671,7 @@ void BufferCache

::DeleteBuffer(BufferId buffer_id, bool do_not_mark) { } Unregister(buffer_id); - -#ifdef ANDROID - if (!do_not_mark) -#endif delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id])); - slot_buffers.erase(buffer_id); if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index 3a40bbad19..240e9f0150 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -156,11 +156,7 @@ template class BufferCache : public VideoCommon::ChannelSetupCaches { // Page size for caching purposes. // This is unrelated to the CPU page size and it can be changed as it seems optimal. -#ifdef ANDROID - static constexpr u32 CACHING_PAGEBITS = 12; -#else static constexpr u32 CACHING_PAGEBITS = 16; -#endif static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS; static constexpr bool IS_OPENGL = P::IS_OPENGL; @@ -174,15 +170,9 @@ class BufferCache : public VideoCommon::ChannelSetupCaches slot_buffers; -#ifdef ANDROID - static constexpr size_t TICKS_TO_DESTROY = 2; -#else - static constexpr size_t TICKS_TO_DESTROY = 8; -#endif - DelayedDestructionRing delayed_destruction_ring; + DelayedDestructionRing delayed_destruction_ring; const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect{}; diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index aa501ae51b..3bcf1b0664 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp @@ -20,9 +20,6 @@ CDmaPusher::CDmaPusher(Host1x::Host1x& host1x_, s32 id) : host1x{host1x_}, memory_manager{host1x.GMMU()}, host_processor{std::make_unique(host1x_)}, current_class{ static_cast(id)} { -#ifdef ANDROID - std::this_thread::sleep_for(std::chrono::milliseconds{500}); // HACK: Fix for Astroneer - doesn't always start without this delay. Happens on Windows too (but rarer) -#endif thread = std::jthread([this](std::stop_token stop_token) { ProcessEntries(stop_token); }); } diff --git a/src/video_core/delayed_destruction_ring.h b/src/video_core/delayed_destruction_ring.h index 49d7842f8e..d13ee622b2 100644 --- a/src/video_core/delayed_destruction_ring.h +++ b/src/video_core/delayed_destruction_ring.h @@ -22,6 +22,7 @@ public: void Push(T&& object) { elements[index].push_back(std::move(object)); } + private: size_t index = 0; std::array, TICKS_TO_DESTROY> elements; diff --git a/src/video_core/host1x/ffmpeg/ffmpeg.cpp b/src/video_core/host1x/ffmpeg/ffmpeg.cpp index 4bca9ab7b1..ffde231aee 100644 --- a/src/video_core/host1x/ffmpeg/ffmpeg.cpp +++ b/src/video_core/host1x/ffmpeg/ffmpeg.cpp @@ -237,23 +237,16 @@ std::shared_ptr DecoderContext::ReceiveFrame() { if (m_codec_context->hw_device_ctx) { // If we have a hardware context, make a separate frame here to receive the // hardware result before sending it to the output. - std::shared_ptr intermediate_frame = std::make_shared(); + Frame intermediate_frame; - if (!receive(intermediate_frame->GetFrame())) { + if (!receive(intermediate_frame.GetFrame())) { return {}; } - const auto fmt = intermediate_frame->GetPixelFormat(); - const auto desc = av_pix_fmt_desc_get(fmt); - if (desc && (desc->flags & AV_PIX_FMT_FLAG_HWACCEL)) { - m_temp_frame->SetFormat(PreferredGpuFormat); - if (int ret = av_hwframe_transfer_data(m_temp_frame->GetFrame(), intermediate_frame->GetFrame(), 0); ret < 0) { - LOG_ERROR(HW_GPU, "av_hwframe_transfer_data error: {}", AVError(ret)); - return {}; - } - } - else { - m_temp_frame = std::move(intermediate_frame); + m_temp_frame->SetFormat(PreferredGpuFormat); + if (int ret = av_hwframe_transfer_data(m_temp_frame->GetFrame(), intermediate_frame.GetFrame(), 0); ret < 0) { + LOG_ERROR(HW_GPU, "av_hwframe_transfer_data error: {}", AVError(ret)); + return {}; } } else { // Otherwise, decode the frame as normal. diff --git a/src/video_core/host1x/ffmpeg/ffmpeg.h b/src/video_core/host1x/ffmpeg/ffmpeg.h index 9fe0b1532a..28f1742b7e 100644 --- a/src/video_core/host1x/ffmpeg/ffmpeg.h +++ b/src/video_core/host1x/ffmpeg/ffmpeg.h @@ -21,7 +21,6 @@ extern "C" { #include #include -#include #include #if defined(__GNUC__) || defined(__clang__) diff --git a/src/video_core/renderer_vulkan/present/layer.cpp b/src/video_core/renderer_vulkan/present/layer.cpp index 984466f687..4e41afe5b4 100644 --- a/src/video_core/renderer_vulkan/present/layer.cpp +++ b/src/video_core/renderer_vulkan/present/layer.cpp @@ -137,7 +137,6 @@ void Layer::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) { .pQueueFamilyIndices = nullptr, }; - buffer.reset(); buffer = memory_allocator.CreateBuffer(ci, MemoryUsage::Upload); } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 3178866128..feb1c575eb 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -78,9 +78,6 @@ void TextureCache

::RunGarbageCollector() { size_t num_iterations = 0; const auto Configure = [&](bool allow_aggressive) { -#ifdef ANDROID - high_priority_mode = true; -#endif high_priority_mode = total_used_memory >= expected_memory; aggressive_mode = allow_aggressive && total_used_memory >= critical_memory; ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL; diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index ba38182169..da98a634b5 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -107,17 +107,10 @@ class TextureCache : public VideoCommon::ChannelSetupCaches::max()}; - #ifdef ANDROID - static constexpr s64 TARGET_THRESHOLD = 3_GiB; - static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB; - static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB; - static constexpr size_t GC_EMERGENCY_COUNTS = 2; - #else static constexpr s64 TARGET_THRESHOLD = 4_GiB; static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB; static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB; static constexpr size_t GC_EMERGENCY_COUNTS = 2; - #endif using Runtime = typename P::Runtime; using Image = typename P::Image; @@ -483,11 +476,7 @@ private: }; Common::LeastRecentlyUsedCache lru_cache; - #ifdef ANDROID - static constexpr size_t TICKS_TO_DESTROY = 2; - #else static constexpr size_t TICKS_TO_DESTROY = 8; -#endif DelayedDestructionRing sentenced_images; DelayedDestructionRing sentenced_image_view; DelayedDestructionRing sentenced_framebuffers; diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp index e35dea90d6..54331688e3 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp @@ -268,10 +268,7 @@ vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsa VmaAllocation allocation{}; VkMemoryPropertyFlags property_flags{}; - VkResult result = vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info); - if (result == VK_ERROR_OUT_OF_DEVICE_MEMORY) { - LOG_ERROR(Render_Vulkan, "Out of memory creating buffer (size: {})", ci.size); - } + vk::Check(vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info)); vmaGetAllocationMemoryProperties(allocator, allocation, &property_flags); u8* data = reinterpret_cast(alloc_info.pMappedData);