mirror of
https://git.eden-emu.dev/eden-emu/eden.git
synced 2025-07-20 03:35:46 +00:00
revert 91a662431c
(#240)
revert [Texture_cache] Better memory handling for devices with lower memory allocations (#233) Means games like Minecraft Dungeons, Sea of Stars, Luigi Mansion 2, Astroneer, Alan Wake, etc are now playable. It also cleans up the recent abi.cpp and bindless texture commits a bit. Everything is in #ifdef ANDROID - The biggest change is CACHING_PAGEBITS = 12. Without that the way the buffercache grows and joins buffers can cause Android to run out of memory (as you end up with just one big buffer that needs to be copied every time it grows) Also patches up ffmpeg issues. Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/233 Co-authored-by: JPikachu <jpikachu.eden@gmail.com> Co-committed-by: JPikachu <jpikachu.eden@gmail.com> Had showed some regressions on devices with higher specifications, will be refined to return as a toggle in a later commit. Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/240
This commit is contained in:
parent
09194cc5c3
commit
fa600b88b1
15 changed files with 53 additions and 96 deletions
|
@ -116,13 +116,18 @@ void ABI_PopCallerSaveRegistersAndAdjustStack(BlockOfCode& code, const std::size
|
|||
ABI_PopRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLER_SAVE);
|
||||
}
|
||||
|
||||
static std::vector<HostLoc> ABI_AllCallerSaveExcept(const std::size_t except) noexcept {
|
||||
std::vector<HostLoc> arr;
|
||||
std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(arr), static_cast<HostLoc>(except));
|
||||
static consteval size_t ABI_AllCallerSaveSize() noexcept {
|
||||
return ABI_ALL_CALLER_SAVE.max_size();
|
||||
}
|
||||
static consteval std::array<HostLoc, ABI_AllCallerSaveSize() - 1> ABI_AllCallerSaveExcept(const std::size_t except) noexcept {
|
||||
std::array<HostLoc, ABI_AllCallerSaveSize() - 1> arr;
|
||||
for(std::size_t i = 0; i < arr.size(); ++i) {
|
||||
arr[i] = static_cast<HostLoc>(i + (i >= except ? 1 : 0));
|
||||
}
|
||||
return arr;
|
||||
}
|
||||
|
||||
alignas(64) static std::vector<HostLoc> ABI_CALLER_SAVED_EXCEPT_TABLE[32] = {
|
||||
alignas(64) static constinit std::array<HostLoc, ABI_AllCallerSaveSize() - 1> ABI_CALLER_SAVED_EXCEPT_TABLE[32] = {
|
||||
ABI_AllCallerSaveExcept(0),
|
||||
ABI_AllCallerSaveExcept(1),
|
||||
ABI_AllCallerSaveExcept(2),
|
||||
|
@ -158,12 +163,24 @@ alignas(64) static std::vector<HostLoc> ABI_CALLER_SAVED_EXCEPT_TABLE[32] = {
|
|||
};
|
||||
|
||||
void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, const HostLoc exception) {
|
||||
#ifdef _MSC_VER
|
||||
std::vector<HostLoc> regs;
|
||||
std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception);
|
||||
ABI_PushRegistersAndAdjustStack(code, 0, regs);
|
||||
#else
|
||||
ASSUME(size_t(exception) < 32);
|
||||
ABI_PushRegistersAndAdjustStack(code, 0, ABI_CALLER_SAVED_EXCEPT_TABLE[size_t(exception)]);
|
||||
#endif
|
||||
}
|
||||
|
||||
void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, const HostLoc exception) {
|
||||
#ifdef _MSC_VER
|
||||
std::vector<HostLoc> regs;
|
||||
std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception);
|
||||
ABI_PopRegistersAndAdjustStack(code, 0, regs);
|
||||
#else
|
||||
ASSUME(size_t(exception) < 32);
|
||||
ABI_PopRegistersAndAdjustStack(code, 0, ABI_CALLER_SAVED_EXCEPT_TABLE[size_t(exception)]);
|
||||
#endif
|
||||
}
|
||||
} // namespace Dynarmic::Backend::X64
|
||||
|
|
|
@ -224,7 +224,7 @@ HaltReason ArmNce::RunThread(Kernel::KThread* thread) {
|
|||
if (auto it = post_handlers.find(m_guest_ctx.pc); it != post_handlers.end()) {
|
||||
hr = ReturnToRunCodeByTrampoline(thread_params, &m_guest_ctx, it->second);
|
||||
} else {
|
||||
hr = ReturnToRunCodeByExceptionLevelChange(m_thread_id, thread_params); // Android: Use "process handle SIGUSR2 -n true -p true -s false" (and SIGURG) in LLDB when debugging
|
||||
hr = ReturnToRunCodeByExceptionLevelChange(m_thread_id, thread_params);
|
||||
}
|
||||
|
||||
// Critical section for thread cleanup
|
||||
|
|
|
@ -151,12 +151,7 @@ static std::pair<u32, GetAddrInfoError> GetHostByNameRequestImpl(HLERequestConte
|
|||
// For now, ignore options, which are in input buffer 1 for GetHostByNameRequestWithOptions.
|
||||
|
||||
// Prevent resolution of Nintendo servers
|
||||
if (host.find("srv.nintendo.net") != std::string::npos ||
|
||||
host.find("battle.net") != std::string::npos ||
|
||||
host.find("microsoft.com") != std::string::npos ||
|
||||
host.find("mojang.com") != std::string::npos ||
|
||||
host.find("xboxlive.com") != std::string::npos ||
|
||||
host.find("minecraftservices.com") != std::string::npos) {
|
||||
if (host.find("srv.nintendo.net") != std::string::npos) {
|
||||
LOG_WARNING(Network, "Resolution of hostname {} requested, returning EAI_AGAIN", host);
|
||||
return {0, GetAddrInfoError::AGAIN};
|
||||
}
|
||||
|
@ -273,12 +268,7 @@ static std::pair<u32, GetAddrInfoError> GetAddrInfoRequestImpl(HLERequestContext
|
|||
const std::string host = Common::StringFromBuffer(host_buffer);
|
||||
|
||||
// Prevent resolution of Nintendo servers
|
||||
if (host.find("srv.nintendo.net") != std::string::npos ||
|
||||
host.find("battle.net") != std::string::npos ||
|
||||
host.find("microsoft.com") != std::string::npos ||
|
||||
host.find("mojang.com") != std::string::npos ||
|
||||
host.find("xboxlive.com") != std::string::npos ||
|
||||
host.find("minecraftservices.com") != std::string::npos) {
|
||||
if (host.find("srv.nintendo.net") != std::string::npos) {
|
||||
LOG_WARNING(Network, "Resolution of hostname {} requested, returning EAI_AGAIN", host);
|
||||
return {0, GetAddrInfoError::AGAIN};
|
||||
}
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
#include <fstream>
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
|
@ -34,17 +33,6 @@ enum class ShuffleMode : u64 {
|
|||
}
|
||||
}
|
||||
|
||||
bool IsKONA() {
|
||||
std::ifstream machineFile("/sys/devices/soc0/machine");
|
||||
if (machineFile.is_open()) {
|
||||
std::string line;
|
||||
std::getline(machineFile, line);
|
||||
if (line == "KONA")
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32& mask) {
|
||||
union {
|
||||
u64 insn;
|
||||
|
@ -56,10 +44,7 @@ void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32
|
|||
|
||||
const IR::U32 result{ShuffleOperation(v.ir, v.X(shfl.src_reg), index, mask, shfl.mode)};
|
||||
v.ir.SetPred(shfl.pred, v.ir.GetInBoundsFromOp(result));
|
||||
if (IsKONA())
|
||||
v.X(shfl.dest_reg, v.ir.Imm32(0xffffffff)); // This fixes the freeze for Retroid / Snapdragon SD865
|
||||
else
|
||||
v.X(shfl.dest_reg, result);
|
||||
v.X(shfl.dest_reg, result);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
|
|
|
@ -327,14 +327,30 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environme
|
|||
};
|
||||
}
|
||||
|
||||
// TODO:xbzk: shall be dropped when Track method cover all bindless stuff
|
||||
static ConstBufferAddr last_valid_addr = ConstBufferAddr{
|
||||
.index = 0,
|
||||
.offset = 0,
|
||||
.shift_left = 0,
|
||||
.secondary_index = 0,
|
||||
.secondary_offset = 0,
|
||||
.secondary_shift_left = 0,
|
||||
.dynamic_offset = {},
|
||||
.count = 1,
|
||||
.has_secondary = false,
|
||||
};
|
||||
|
||||
TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) {
|
||||
ConstBufferAddr addr{};
|
||||
ConstBufferAddr addr;
|
||||
if (IsBindless(inst)) {
|
||||
const std::optional<ConstBufferAddr> track_addr{Track(inst.Arg(0), env)};
|
||||
|
||||
if (!track_addr) {
|
||||
LOG_WARNING(Shader, "Failed to track bindless texture constant buffer");
|
||||
//throw NotImplementedException("Failed to track bindless texture constant buffer");
|
||||
addr = last_valid_addr; // TODO:xbzk: shall be dropped when Track method cover all bindless stuff
|
||||
} else {
|
||||
addr = *track_addr;
|
||||
last_valid_addr = addr; // TODO:xbzk: shall be dropped when Track method cover all bindless stuff
|
||||
}
|
||||
} else {
|
||||
addr = ConstBufferAddr{
|
||||
|
|
|
@ -50,7 +50,7 @@ BufferCache<P>::~BufferCache() = default;
|
|||
template <class P>
|
||||
void BufferCache<P>::RunGarbageCollector() {
|
||||
const bool aggressive_gc = total_used_memory >= critical_memory;
|
||||
const u64 ticks_to_destroy = aggressive_gc ? 60 : 150;
|
||||
const u64 ticks_to_destroy = aggressive_gc ? 60 : 120;
|
||||
int num_iterations = aggressive_gc ? 64 : 32;
|
||||
const auto clean_up = [this, &num_iterations](BufferId buffer_id) {
|
||||
if (num_iterations == 0) {
|
||||
|
@ -1380,9 +1380,6 @@ void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
|
|||
});
|
||||
new_buffer.MarkUsage(copies[0].dst_offset, copies[0].size);
|
||||
runtime.CopyBuffer(new_buffer, overlap, copies, true);
|
||||
#ifdef ANDROID
|
||||
runtime.Finish();
|
||||
#endif
|
||||
DeleteBuffer(overlap_id, true);
|
||||
}
|
||||
|
||||
|
@ -1674,12 +1671,7 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) {
|
|||
}
|
||||
|
||||
Unregister(buffer_id);
|
||||
|
||||
#ifdef ANDROID
|
||||
if (!do_not_mark)
|
||||
#endif
|
||||
delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id]));
|
||||
|
||||
slot_buffers.erase(buffer_id);
|
||||
|
||||
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
||||
|
|
|
@ -156,11 +156,7 @@ template <class P>
|
|||
class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInfo> {
|
||||
// Page size for caching purposes.
|
||||
// This is unrelated to the CPU page size and it can be changed as it seems optimal.
|
||||
#ifdef ANDROID
|
||||
static constexpr u32 CACHING_PAGEBITS = 12;
|
||||
#else
|
||||
static constexpr u32 CACHING_PAGEBITS = 16;
|
||||
#endif
|
||||
static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS;
|
||||
|
||||
static constexpr bool IS_OPENGL = P::IS_OPENGL;
|
||||
|
@ -174,15 +170,9 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf
|
|||
static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;
|
||||
static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = P::USE_MEMORY_MAPS_FOR_UPLOADS;
|
||||
|
||||
#ifdef ANDROID
|
||||
static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB;
|
||||
static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB;
|
||||
static constexpr s64 TARGET_THRESHOLD = 3_GiB;
|
||||
#else
|
||||
static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB;
|
||||
static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB;
|
||||
static constexpr s64 TARGET_THRESHOLD = 4_GiB;
|
||||
#endif
|
||||
|
||||
// Debug Flags.
|
||||
|
||||
|
@ -458,12 +448,7 @@ private:
|
|||
Tegra::MaxwellDeviceMemoryManager& device_memory;
|
||||
|
||||
Common::SlotVector<Buffer> slot_buffers;
|
||||
#ifdef ANDROID
|
||||
static constexpr size_t TICKS_TO_DESTROY = 2;
|
||||
#else
|
||||
static constexpr size_t TICKS_TO_DESTROY = 8;
|
||||
#endif
|
||||
DelayedDestructionRing<Buffer, TICKS_TO_DESTROY> delayed_destruction_ring;
|
||||
DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
|
||||
|
||||
const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect{};
|
||||
|
||||
|
|
|
@ -20,9 +20,6 @@ CDmaPusher::CDmaPusher(Host1x::Host1x& host1x_, s32 id)
|
|||
: host1x{host1x_}, memory_manager{host1x.GMMU()},
|
||||
host_processor{std::make_unique<Host1x::Control>(host1x_)}, current_class{
|
||||
static_cast<ChClassId>(id)} {
|
||||
#ifdef ANDROID
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds{500}); // HACK: Fix for Astroneer - doesn't always start without this delay. Happens on Windows too (but rarer)
|
||||
#endif
|
||||
thread = std::jthread([this](std::stop_token stop_token) { ProcessEntries(stop_token); });
|
||||
}
|
||||
|
||||
|
|
|
@ -22,6 +22,7 @@ public:
|
|||
void Push(T&& object) {
|
||||
elements[index].push_back(std::move(object));
|
||||
}
|
||||
|
||||
private:
|
||||
size_t index = 0;
|
||||
std::array<std::vector<T>, TICKS_TO_DESTROY> elements;
|
||||
|
|
|
@ -237,23 +237,16 @@ std::shared_ptr<Frame> DecoderContext::ReceiveFrame() {
|
|||
if (m_codec_context->hw_device_ctx) {
|
||||
// If we have a hardware context, make a separate frame here to receive the
|
||||
// hardware result before sending it to the output.
|
||||
std::shared_ptr<Frame> intermediate_frame = std::make_shared<Frame>();
|
||||
Frame intermediate_frame;
|
||||
|
||||
if (!receive(intermediate_frame->GetFrame())) {
|
||||
if (!receive(intermediate_frame.GetFrame())) {
|
||||
return {};
|
||||
}
|
||||
|
||||
const auto fmt = intermediate_frame->GetPixelFormat();
|
||||
const auto desc = av_pix_fmt_desc_get(fmt);
|
||||
if (desc && (desc->flags & AV_PIX_FMT_FLAG_HWACCEL)) {
|
||||
m_temp_frame->SetFormat(PreferredGpuFormat);
|
||||
if (int ret = av_hwframe_transfer_data(m_temp_frame->GetFrame(), intermediate_frame->GetFrame(), 0); ret < 0) {
|
||||
LOG_ERROR(HW_GPU, "av_hwframe_transfer_data error: {}", AVError(ret));
|
||||
return {};
|
||||
}
|
||||
}
|
||||
else {
|
||||
m_temp_frame = std::move(intermediate_frame);
|
||||
m_temp_frame->SetFormat(PreferredGpuFormat);
|
||||
if (int ret = av_hwframe_transfer_data(m_temp_frame->GetFrame(), intermediate_frame.GetFrame(), 0); ret < 0) {
|
||||
LOG_ERROR(HW_GPU, "av_hwframe_transfer_data error: {}", AVError(ret));
|
||||
return {};
|
||||
}
|
||||
} else {
|
||||
// Otherwise, decode the frame as normal.
|
||||
|
|
|
@ -21,7 +21,6 @@ extern "C" {
|
|||
|
||||
#include <libavcodec/avcodec.h>
|
||||
#include <libavutil/opt.h>
|
||||
#include <libavutil/pixdesc.h>
|
||||
#include <libavcodec/codec_internal.h>
|
||||
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
|
|
|
@ -137,7 +137,6 @@ void Layer::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) {
|
|||
.pQueueFamilyIndices = nullptr,
|
||||
};
|
||||
|
||||
buffer.reset();
|
||||
buffer = memory_allocator.CreateBuffer(ci, MemoryUsage::Upload);
|
||||
}
|
||||
|
||||
|
|
|
@ -78,9 +78,6 @@ void TextureCache<P>::RunGarbageCollector() {
|
|||
size_t num_iterations = 0;
|
||||
|
||||
const auto Configure = [&](bool allow_aggressive) {
|
||||
#ifdef ANDROID
|
||||
high_priority_mode = true;
|
||||
#endif
|
||||
high_priority_mode = total_used_memory >= expected_memory;
|
||||
aggressive_mode = allow_aggressive && total_used_memory >= critical_memory;
|
||||
ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL;
|
||||
|
|
|
@ -107,17 +107,10 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI
|
|||
|
||||
static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()};
|
||||
|
||||
#ifdef ANDROID
|
||||
static constexpr s64 TARGET_THRESHOLD = 3_GiB;
|
||||
static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB;
|
||||
static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB;
|
||||
static constexpr size_t GC_EMERGENCY_COUNTS = 2;
|
||||
#else
|
||||
static constexpr s64 TARGET_THRESHOLD = 4_GiB;
|
||||
static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB;
|
||||
static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB;
|
||||
static constexpr size_t GC_EMERGENCY_COUNTS = 2;
|
||||
#endif
|
||||
|
||||
using Runtime = typename P::Runtime;
|
||||
using Image = typename P::Image;
|
||||
|
@ -483,11 +476,7 @@ private:
|
|||
};
|
||||
Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache;
|
||||
|
||||
#ifdef ANDROID
|
||||
static constexpr size_t TICKS_TO_DESTROY = 2;
|
||||
#else
|
||||
static constexpr size_t TICKS_TO_DESTROY = 8;
|
||||
#endif
|
||||
DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images;
|
||||
DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view;
|
||||
DelayedDestructionRing<Framebuffer, TICKS_TO_DESTROY> sentenced_framebuffers;
|
||||
|
|
|
@ -268,10 +268,7 @@ vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsa
|
|||
VmaAllocation allocation{};
|
||||
VkMemoryPropertyFlags property_flags{};
|
||||
|
||||
VkResult result = vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info);
|
||||
if (result == VK_ERROR_OUT_OF_DEVICE_MEMORY) {
|
||||
LOG_ERROR(Render_Vulkan, "Out of memory creating buffer (size: {})", ci.size);
|
||||
}
|
||||
vk::Check(vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info));
|
||||
vmaGetAllocationMemoryProperties(allocator, allocation, &property_flags);
|
||||
|
||||
u8* data = reinterpret_cast<u8*>(alloc_info.pMappedData);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue