From 8fcd5c1e5a83a5159525cf8c723cd701ec8e26c0 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 10 May 2018 14:50:32 +0300 Subject: [PATCH] rsx: Texture cache fixes 1. rsx: Rework section synchronization using the new memory mirrors 2. rsx: Tweaks - Simplify peeking into the current rsx::thread instance. Use a simple rsx::get_current_renderer instead of asking fxm for the same - Fix global rsx super memory shm block management 3. rsx: Improve memory validation. test_framebuffer() and tag_framebuffer() are simplified due to mirror support 4. rsx: Only write back confirmed memory range to avoid overapproximation errors in blit engine 5. rsx: Explicitly mark clobbered flushable sections as dirty to have them removed 6. rsx: Cumulative fixes - Reimplement rsx::buffered_section management routines - blit engine subsections are not hit-tested against confirmed/committed memory range Not all applications are 'honest' about region bounds, making the real cpu range useless for blit ops --- rpcs3/Emu/Cell/Modules/cellGcmSys.cpp | 46 ++-- rpcs3/Emu/Cell/lv2/sys_rsx.cpp | 4 +- rpcs3/Emu/RSX/Common/texture_cache.h | 300 +++++++++++--------------- rpcs3/Emu/RSX/GL/GLTextureCache.h | 32 +-- rpcs3/Emu/RSX/Overlays/overlays.cpp | 2 +- rpcs3/Emu/RSX/RSXThread.cpp | 4 + rpcs3/Emu/RSX/RSXThread.h | 3 + rpcs3/Emu/RSX/VK/VKHelpers.cpp | 7 + rpcs3/Emu/RSX/VK/VKTextureCache.h | 69 +++--- rpcs3/Emu/RSX/rsx_cache.h | 212 +++++++++++++++--- rpcs3/Emu/RSX/rsx_utils.cpp | 44 ++++ rpcs3/Emu/RSX/rsx_utils.h | 49 +++++ rpcs3/emucore.vcxproj | 2 +- rpcs3/rpcs3qt/rsx_debugger.cpp | 18 +- 14 files changed, 512 insertions(+), 280 deletions(-) diff --git a/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp b/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp index 1e7ed43680..b3692c547e 100644 --- a/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp +++ b/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp @@ -275,7 +275,7 @@ s32 cellGcmBindTile(u8 index) return CELL_GCM_ERROR_INVALID_VALUE; } - fxm::get()->tiles[index].binded = true; + rsx::get_current_renderer()->tiles[index].binded = true; return CELL_OK; } @@ -291,7 +291,7 @@ s32 cellGcmBindZcull(u8 index, u32 offset, u32 width, u32 height, u32 cullStart, return CELL_GCM_ERROR_INVALID_VALUE; } - fxm::get()->zculls[index].binded = true; + rsx::get_current_renderer()->zculls[index].binded = true; return CELL_OK; } @@ -307,7 +307,7 @@ void cellGcmGetConfiguration(vm::ptr config) u32 cellGcmGetFlipStatus() { - u32 status = fxm::get()->flip_status; + u32 status = rsx::get_current_renderer()->flip_status; cellGcmSys.trace("cellGcmGetFlipStatus() -> %d", status); @@ -421,7 +421,7 @@ s32 _cellGcmInitBody(vm::pptr context, u32 cmdSize, u32 ioSi ctrl.get = 0; ctrl.ref = -1; - const auto render = fxm::get(); + const auto render = rsx::get_current_renderer(); render->intr_thread = idm::make_ptr("_gcm_intr_thread", 1, 0x4000); render->intr_thread->run(); render->main_mem_addr = 0; @@ -436,7 +436,7 @@ void cellGcmResetFlipStatus() { cellGcmSys.trace("cellGcmResetFlipStatus()"); - fxm::get()->flip_status = CELL_GCM_DISPLAY_FLIP_STATUS_WAITING; + rsx::get_current_renderer()->flip_status = CELL_GCM_DISPLAY_FLIP_STATUS_WAITING; } void cellGcmSetDebugOutputLevel(s32 level) @@ -448,7 +448,7 @@ void cellGcmSetDebugOutputLevel(s32 level) case CELL_GCM_DEBUG_LEVEL0: case CELL_GCM_DEBUG_LEVEL1: case CELL_GCM_DEBUG_LEVEL2: - fxm::get()->debug_level = level; + rsx::get_current_renderer()->debug_level = level; break; default: @@ -470,7 +470,7 @@ s32 cellGcmSetDisplayBuffer(u8 id, u32 offset, u32 pitch, u32 width, u32 height) return CELL_GCM_ERROR_FAILURE; } - const auto render = fxm::get(); + const auto render = rsx::get_current_renderer(); auto buffers = render->display_buffers; @@ -496,21 +496,21 @@ void cellGcmSetFlipHandler(vm::ptr handler) { cellGcmSys.warning("cellGcmSetFlipHandler(handler=*0x%x)", handler); - fxm::get()->flip_handler = handler; + rsx::get_current_renderer()->flip_handler = handler; } void cellGcmSetFlipMode(u32 mode) { cellGcmSys.warning("cellGcmSetFlipMode(mode=%d)", mode); - fxm::get()->requested_vsync.store(mode == CELL_GCM_DISPLAY_VSYNC); + rsx::get_current_renderer()->requested_vsync.store(mode == CELL_GCM_DISPLAY_VSYNC); } void cellGcmSetFlipStatus() { cellGcmSys.warning("cellGcmSetFlipStatus()"); - fxm::get()->flip_status = CELL_GCM_DISPLAY_FLIP_STATUS_DONE; + rsx::get_current_renderer()->flip_status = CELL_GCM_DISPLAY_FLIP_STATUS_DONE; } s32 cellGcmSetPrepareFlip(ppu_thread& ppu, vm::ptr ctxt, u32 id) @@ -561,7 +561,7 @@ void cellGcmSetSecondVFrequency(u32 freq) { cellGcmSys.warning("cellGcmSetSecondVFrequency(level=%d)", freq); - const auto render = fxm::get(); + const auto render = rsx::get_current_renderer(); switch (freq) { @@ -612,7 +612,7 @@ s32 cellGcmSetTileInfo(u8 index, u8 location, u32 offset, u32 size, u32 pitch, u cellGcmSys.error("cellGcmSetTileInfo: bad compression mode! (%d)", comp); } - const auto render = fxm::get(); + const auto render = rsx::get_current_renderer(); auto& tile = render->tiles[index]; tile.location = location; @@ -631,7 +631,7 @@ void cellGcmSetUserHandler(vm::ptr handler) { cellGcmSys.warning("cellGcmSetUserHandler(handler=*0x%x)", handler); - fxm::get()->user_handler = handler; + rsx::get_current_renderer()->user_handler = handler; } void cellGcmSetUserCommand(vm::ptr ctxt, u32 cause) @@ -643,7 +643,7 @@ void cellGcmSetVBlankHandler(vm::ptr handler) { cellGcmSys.warning("cellGcmSetVBlankHandler(handler=*0x%x)", handler); - fxm::get()->vblank_handler = handler; + rsx::get_current_renderer()->vblank_handler = handler; } void cellGcmSetWaitFlip(vm::ptr ctxt) @@ -675,7 +675,7 @@ void cellGcmSetZcull(u8 index, u32 offset, u32 width, u32 height, u32 cullStart, return; } - const auto render = fxm::get(); + const auto render = rsx::get_current_renderer(); auto& zcull = render->zculls[index]; zcull.offset = offset; @@ -703,7 +703,7 @@ s32 cellGcmUnbindTile(u8 index) return CELL_GCM_ERROR_INVALID_VALUE; } - fxm::get()->tiles[index].binded = false; + rsx::get_current_renderer()->tiles[index].binded = false; return CELL_OK; } @@ -718,7 +718,7 @@ s32 cellGcmUnbindZcull(u8 index) return CELL_GCM_ERROR_INVALID_VALUE; } - fxm::get()->zculls[index].binded = false; + rsx::get_current_renderer()->zculls[index].binded = false; return CELL_OK; } @@ -754,7 +754,7 @@ s32 cellGcmGetCurrentDisplayBufferId(vm::ptr id) { cellGcmSys.warning("cellGcmGetCurrentDisplayBufferId(id=*0x%x)", id); - if ((*id = fxm::get()->current_display_buffer) > UINT8_MAX) + if ((*id = rsx::get_current_renderer()->current_display_buffer) > UINT8_MAX) { fmt::throw_exception("Unexpected" HERE); } @@ -788,7 +788,7 @@ u64 cellGcmGetLastFlipTime() { cellGcmSys.trace("cellGcmGetLastFlipTime()"); - return fxm::get()->last_flip_time; + return rsx::get_current_renderer()->last_flip_time; } u64 cellGcmGetLastSecondVTime() @@ -801,7 +801,7 @@ u64 cellGcmGetVBlankCount() { cellGcmSys.trace("cellGcmGetVBlankCount()"); - return fxm::get()->vblank_count; + return rsx::get_current_renderer()->vblank_count; } s32 cellGcmSysGetLastVBlankTime() @@ -933,7 +933,7 @@ s32 gcmMapEaIoAddress(u32 ea, u32 io, u32 size, bool is_strict) { if ((ea & 0xFFFFF) || (io & 0xFFFFF) || (size & 0xFFFFF)) return CELL_GCM_ERROR_FAILURE; - const auto render = fxm::get(); + const auto render = rsx::get_current_renderer(); // Check if the mapping was successfull if (RSXIOMem.Map(ea, size, io)) @@ -997,7 +997,7 @@ s32 cellGcmMapMainMemory(u32 ea, u32 size, vm::ptr offset) u32 io = RSXIOMem.Map(ea, size); - const auto render = fxm::get(); + const auto render = rsx::get_current_renderer(); //check if the mapping was successfull if (RSXIOMem.RealAddr(io) == ea) @@ -1237,7 +1237,7 @@ s32 cellGcmSetTile(u8 index, u8 location, u32 offset, u32 size, u32 pitch, u8 co cellGcmSys.error("cellGcmSetTile: bad compression mode! (%d)", comp); } - const auto render = fxm::get(); + const auto render = rsx::get_current_renderer(); auto& tile = render->tiles[index]; tile.location = location; diff --git a/rpcs3/Emu/Cell/lv2/sys_rsx.cpp b/rpcs3/Emu/Cell/lv2/sys_rsx.cpp index a41ff71c58..edf165bc30 100644 --- a/rpcs3/Emu/Cell/lv2/sys_rsx.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_rsx.cpp @@ -150,7 +150,7 @@ s32 sys_rsx_context_allocate(vm::ptr context_id, vm::ptr lpar_dma_cont m_sysrsx->rsx_event_port = queueId->value(); - const auto render = fxm::get(); + const auto render = rsx::get_current_renderer(); render->display_buffers_count = 0; render->current_display_buffer = 0; render->main_mem_addr = 0; @@ -222,7 +222,7 @@ s32 sys_rsx_context_attribute(s32 context_id, u32 package_id, u64 a3, u64 a4, u6 // todo: these event ports probly 'shouldnt' be here as i think its supposed to be interrupts that are sent from rsx somewhere in lv1 - const auto render = fxm::get(); + const auto render = rsx::get_current_renderer(); //hle protection if (render->isHLE) diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 5c9181fd8d..5f031ec5d8 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -271,8 +271,7 @@ namespace rsx { bool violation_handled = false; std::vector sections_to_flush; //Sections to be flushed - std::vector sections_to_reprotect; //Sections to be protected after flushing - std::vector sections_to_unprotect; //These sections are to be unprotected and discarded by caller + std::vector sections_to_unprotect; //These sections are to be unpotected and discarded by caller int num_flushable = 0; u64 cache_tag = 0; u32 address_base = 0; @@ -465,7 +464,7 @@ namespace rsx { for (auto &tex : found->second.data) { - if (tex.is_locked() && tex.overlaps(address, false)) + if (tex.is_locked() && tex.overlaps(address, rsx::overlap_test_bounds::protected_range)) return{ tex.get_protection(), &tex }; } } @@ -476,7 +475,7 @@ namespace rsx { for (auto &tex : found->second.data) { - if (tex.is_locked() && tex.overlaps(address, false)) + if (tex.is_locked() && tex.overlaps(address, rsx::overlap_test_bounds::protected_range)) return{ tex.get_protection(), &tex }; } } @@ -541,7 +540,11 @@ namespace rsx if (tex.cache_tag == cache_tag) continue; //already processed if (!tex.is_locked()) continue; //flushable sections can be 'clean' but unlocked. TODO: Handle this better - auto overlapped = tex.overlaps_page(trampled_range, address, strict_range_check || tex.get_context() == rsx::texture_upload_context::blit_engine_dst); + const auto bounds_test = (strict_range_check || tex.get_context() == rsx::texture_upload_context::blit_engine_dst) ? + rsx::overlap_test_bounds::full_range : + rsx::overlap_test_bounds::protected_range; + + auto overlapped = tex.overlaps_page(trampled_range, address, bounds_test); if (std::get<0>(overlapped)) { auto &new_range = std::get<1>(overlapped); @@ -598,61 +601,94 @@ namespace rsx } } - std::vector reprotections; for (auto &obj : trampled_set) { - bool to_reprotect = false; - - if (!deferred_flush && !discard_only) + if (!discard_only) { - if (!is_writing && obj.first->get_protection() != utils::protection::no) + bool collateral = false; + if (!deferred_flush) { - to_reprotect = true; - } - else - { - if (rebuild_cache && allow_flush && obj.first->is_flushable()) + if (!is_writing && obj.first->get_protection() != utils::protection::no) { - const std::pair null_check = std::make_pair(UINT32_MAX, 0); - to_reprotect = !std::get<0>(obj.first->overlaps_page(null_check, address, true)); + collateral = true; + } + else + { + if (rebuild_cache && allow_flush && obj.first->is_flushable()) + { + const std::pair null_check = std::make_pair(UINT32_MAX, 0); + collateral = !std::get<0>(obj.first->overlaps_page(null_check, address, rsx::overlap_test_bounds::full_range)); + } } } + + if (collateral) + { + //False positive + continue; + } + else if (obj.first->is_flushable()) + { + //Write if and only if no one else has trashed section memory already + //TODO: Proper section management should prevent this from happening + //TODO: Blit engine section merge support and/or partial texture memory buffering + if (!obj.first->test_memory_head() || !obj.first->test_memory_tail()) + { + if (obj.first->get_memory_read_flags() == rsx::memory_read_flags::flush_always) + { + //Someone decided to overwrite memory specified as an active framebuffer + m_flush_always_cache.erase(obj.first->get_section_base()); + } + + //Contents clobbered, destroy this + obj.first->set_dirty(true); + m_unreleased_texture_objects++; + + result.sections_to_unprotect.push_back(obj.first); + } + else if (!allow_flush) + { + result.sections_to_flush.push_back(obj.first); + } + else + { + if (!obj.first->flush(std::forward(extras)...)) + { + //Missed address, note this + //TODO: Lower severity when successful to keep the cache from overworking + record_cache_miss(*obj.first); + } + + m_num_flush_requests++; + result.sections_to_unprotect.push_back(obj.first); + } + + continue; + } + else if (deferred_flush) + { + //allow_flush = false and not synchronized + result.sections_to_unprotect.push_back(obj.first); + continue; + } } - if (to_reprotect) - { - result.sections_to_reprotect.push_back(obj.first); - reprotections.push_back(obj.first->get_protection()); - } - else if (obj.first->is_flushable()) - { - result.sections_to_flush.push_back(obj.first); - } - else if (!deferred_flush) + if (!obj.first->is_flushable()) { obj.first->set_dirty(true); m_unreleased_texture_objects++; } - else - { - result.sections_to_unprotect.push_back(obj.first); - } - - if (deferred_flush) - continue; + //Only unsynchronized (no-flush) sections should reach here, and only if the rendering thread is the caller if (discard_only) obj.first->discard(); else obj.first->unprotect(); - if (!to_reprotect) - { - obj.second->remove_one(); - } + obj.second->remove_one(); } - if (deferred_flush) + if (deferred_flush && result.sections_to_flush.size()) { result.num_flushable = static_cast(result.sections_to_flush.size()); result.address_base = address; @@ -660,33 +696,16 @@ namespace rsx result.cache_tag = m_cache_update_tag.load(std::memory_order_consume); return result; } - - if (result.sections_to_flush.size() > 0) + else { - verify(HERE), allow_flush; - - // Flush here before 'reprotecting' since flushing will write the whole span - for (const auto &tex : result.sections_to_flush) + //Flushes happen in one go, now its time to remove protection + for (auto& section : result.sections_to_unprotect) { - if (!tex->flush(std::forward(extras)...)) - { - //Missed address, note this - //TODO: Lower severity when successful to keep the cache from overworking - record_cache_miss(*tex); - } - - m_num_flush_requests++; + section->unprotect(); + m_cache[get_block_address(section->get_section_base())].remove_one(); } } - int n = 0; - for (auto &tex: result.sections_to_reprotect) - { - tex->discard(); - tex->protect(reprotections[n++]); - tex->set_dirty(false); - } - //Everything has been handled result = {}; result.violation_handled = true; @@ -807,7 +826,7 @@ namespace rsx if (tex.get_section_base() > rsx_address) continue; - if (!tex.is_dirty() && tex.overlaps(test, true)) + if (!tex.is_dirty() && tex.overlaps(test, rsx::overlap_test_bounds::full_range)) results.push_back(&tex); } } @@ -1076,7 +1095,7 @@ namespace rsx if (tex.is_dirty()) continue; if (!tex.is_flushable()) continue; - if (tex.overlaps(address, false)) + if (tex.overlaps(address, rsx::overlap_test_bounds::protected_range)) return std::make_tuple(true, &tex); } } @@ -1100,7 +1119,7 @@ namespace rsx if (tex.is_dirty()) continue; if (!tex.is_flushable()) continue; - if (tex.overlaps(address, false)) + if (tex.overlaps(address, rsx::overlap_test_bounds::protected_range)) return std::make_tuple(true, &tex); } } @@ -1138,20 +1157,21 @@ namespace rsx if (m_cache_update_tag.load(std::memory_order_consume) == data.cache_tag) { - std::vector old_protections; - for (auto &tex : data.sections_to_reprotect) + //1. Write memory to cpu side + for (auto &tex : data.sections_to_flush) { if (tex->is_locked()) { - old_protections.push_back(tex->get_protection()); - tex->unprotect(); - } - else - { - old_protections.push_back(utils::protection::rw); + if (!tex->flush(std::forward(extras)...)) + { + record_cache_miss(*tex); + } + + m_num_flush_requests++; } } + //2. Release all obsolete sections for (auto &tex : data.sections_to_unprotect) { if (tex->is_locked()) @@ -1162,46 +1182,11 @@ namespace rsx } } - //TODO: This bit can cause race conditions if other threads are accessing this memory - //1. Force readback if surface is not synchronized yet to make unlocked part finish quickly - for (auto &tex : data.sections_to_flush) - { - if (tex->is_locked()) - { - if (!tex->is_synchronized()) - { - record_cache_miss(*tex); - tex->copy_texture(true, std::forward(extras)...); - } - - m_cache[get_block_address(tex->get_section_base())].remove_one(); - } - } - - //TODO: Acquire global io lock here - - //2. Unprotect all the memory + //3. Release all flushed sections for (auto &tex : data.sections_to_flush) { tex->unprotect(); - } - - //3. Write all the memory - for (auto &tex : data.sections_to_flush) - { - tex->flush(std::forward(extras)...); - m_num_flush_requests++; - } - - //Restore protection on the sections to reprotect - int n = 0; - for (auto &tex : data.sections_to_reprotect) - { - if (old_protections[n] != utils::protection::rw) - { - tex->discard(); - tex->protect(old_protections[n++]); - } + m_cache[get_block_address(tex->get_section_base())].remove_one(); } } else @@ -1781,7 +1766,7 @@ namespace rsx for (const auto &surface : overlapping_surfaces) { if (surface->get_context() != rsx::texture_upload_context::blit_engine_dst || - !surface->is_locked()) + !surface->overlaps(std::make_pair(texaddr, tex_size), rsx::overlap_test_bounds::confirmed_range)) continue; if (surface->get_width() >= tex_width && surface->get_height() >= tex_height) @@ -2251,30 +2236,41 @@ namespace rsx dst_is_argb8 ? rsx::texture_create_flags::default_component_order : rsx::texture_create_flags::swapped_native_component_order; - dest_texture = create_new_texture(cmd, dst.rsx_address, dst.pitch * dst_dimensions.height, + //NOTE: Should upload from cpu instead of creating a blank texture + cached_dest = create_new_texture(cmd, dst.rsx_address, dst.pitch * dst_dimensions.height, dst_dimensions.width, dst_dimensions.height, 1, 1, gcm_format, rsx::texture_upload_context::blit_engine_dst, rsx::texture_dimension_extended::texture_dimension_2d, - channel_order, rsx::texture_colorspace::rgb_linear, rsx::default_remap_vector)->get_raw_texture(); + channel_order, rsx::texture_colorspace::rgb_linear, rsx::default_remap_vector); + dest_texture = cached_dest->get_raw_texture(); m_texture_memory_in_use += dst.pitch * dst_dimensions.height; } - else if (cached_dest) - { - if (!cached_dest->is_locked()) - { - lock.upgrade(); - cached_dest->reprotect(utils::protection::no); + if (cached_dest) + { + const bool notify = !cached_dest->is_locked(); + const u32 mem_base = dst_area.y1 * dst.pitch; + const u32 mem_length = dst.pitch * dst.clip_height; + + lock.upgrade(); + + if (notify) + { m_cache[get_block_address(cached_dest->get_section_base())].notify(); } else if (cached_dest->is_synchronized()) { - //Prematurely read back + // Premature readback m_num_cache_mispredictions++; } + cached_dest->reprotect(utils::protection::no, { mem_base, mem_length }); cached_dest->touch(); } + else + { + verify(HERE), dst_is_render_target; + } if (rsx::get_resolution_scale_percent() != 100) { @@ -2345,9 +2341,17 @@ namespace rsx auto& section = find_cached_texture(It.first, It.second); if (section.get_protection() != utils::protection::no) { - //NOTE: find_cached_texture will increment block ctr - section.reprotect(utils::protection::no); - update_tag = true; + if (section.exists()) + { + //NOTE: find_cached_texture will increment block ctr + section.reprotect(utils::protection::no); + update_tag = true; + } + else + { + //This should never happen + LOG_ERROR(RSX, "Reprotection attempted on destroyed framebuffer section @ 0x%x+0x%x", It.first, It.second); + } } } @@ -2406,58 +2410,14 @@ namespace rsx void tag_framebuffer(u32 texaddr) { - if (!g_cfg.video.strict_rendering_mode) - return; - - writer_lock lock(m_cache_mutex); - - const auto protect_info = get_memory_protection(texaddr); - if (protect_info.first != utils::protection::rw) - { - if (protect_info.second->overlaps(texaddr, true)) - { - if (protect_info.first == utils::protection::no) - return; - - if (protect_info.second->get_context() != texture_upload_context::blit_engine_dst) - { - //TODO: Invalidate this section - LOG_TRACE(RSX, "Framebuffer memory occupied by regular texture!"); - } - } - - protect_info.second->unprotect(); - vm::write32(texaddr, texaddr); - protect_info.second->protect(protect_info.first); - return; - } - - vm::write32(texaddr, texaddr); + auto ptr = rsx::get_super_ptr(texaddr, 4).get(); + *ptr = texaddr; } bool test_framebuffer(u32 texaddr) { - if (!g_cfg.video.strict_rendering_mode) - return true; - - if (g_cfg.video.write_color_buffers || g_cfg.video.write_depth_buffer) - { - writer_lock lock(m_cache_mutex); - auto protect_info = get_memory_protection(texaddr); - if (protect_info.first == utils::protection::no) - { - if (protect_info.second->overlaps(texaddr, true)) - return true; - - //Address isnt actually covered by the region, it only shares a page with it - protect_info.second->unprotect(); - bool result = (vm::read32(texaddr) == texaddr); - protect_info.second->protect(utils::protection::no); - return result; - } - } - - return vm::read32(texaddr) == texaddr; + auto ptr = rsx::get_super_ptr(texaddr, 4).get(); + return *ptr == texaddr; } }; } diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index 96a75e34f8..3579cd634d 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -477,12 +477,14 @@ namespace gl m_fence.wait_for_signal(); flushed = true; + const auto valid_range = get_confirmed_range(); + void *dst = get_raw_ptr(valid_range.first); + glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id); - void *data = glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, pbo_size, GL_MAP_READ_BIT); - u8 *dst = vm::_ptr(cpu_address_base); + void *src = glMapBufferRange(GL_PIXEL_PACK_BUFFER, valid_range.first, valid_range.second, GL_MAP_READ_BIT); //throw if map failed since we'll segfault anyway - verify(HERE), data != nullptr; + verify(HERE), src != nullptr; bool require_manual_shuffle = false; if (pack_unpack_swap_bytes) @@ -493,20 +495,17 @@ namespace gl if (real_pitch >= rsx_pitch || scaled_texture != 0) { - memcpy(dst, data, cpu_address_range); + memcpy(dst, src, valid_range.second); } else { - const u8 pixel_size = get_pixel_size(format, type); - const u8 samples_u = (aa_mode == rsx::surface_antialiasing::center_1_sample) ? 1 : 2; - const u8 samples_v = (aa_mode == rsx::surface_antialiasing::square_centered_4_samples || aa_mode == rsx::surface_antialiasing::square_rotated_4_samples) ? 2 : 1; - rsx::scale_image_nearest(dst, const_cast(data), width, height, rsx_pitch, real_pitch, pixel_size, samples_u, samples_v); + fmt::throw_exception("Unreachable"); } if (require_manual_shuffle) { //byte swapping does not work on byte types, use uint_8_8_8_8 for rgba8 instead to avoid penalty - rsx::shuffle_texel_data_wzyx(dst, rsx_pitch, width, height); + rsx::shuffle_texel_data_wzyx(dst, rsx_pitch, width, valid_range.second / rsx_pitch); } else if (pack_unpack_swap_bytes && ::gl::get_driver_caps().vendor_AMD) { @@ -522,7 +521,7 @@ namespace gl case texture::type::ushort_1_5_5_5_rev: case texture::type::ushort_5_5_5_1: { - const u32 num_reps = cpu_address_range / 2; + const u32 num_reps = valid_range.second / 2; be_t* in = (be_t*)(dst); u16* out = (u16*)dst; @@ -541,7 +540,7 @@ namespace gl case texture::type::uint_2_10_10_10_rev: case texture::type::uint_8_8_8_8: { - u32 num_reps = cpu_address_range / 4; + u32 num_reps = valid_range.second / 4; be_t* in = (be_t*)(dst); u32* out = (u32*)dst; @@ -568,6 +567,13 @@ namespace gl return result; } + void reprotect(utils::protection prot, const std::pair& range) + { + flushed = false; + synchronized = false; + protect(prot, range); + } + void reprotect(utils::protection prot) { flushed = false; @@ -992,9 +998,9 @@ namespace gl fmt::throw_exception("Unexpected gcm format 0x%X" HERE, gcm_format); } + //NOTE: Protection is handled by the caller cached.make_flushable(); cached.set_dimensions(width, height, depth, (rsx_size / height)); - cached.protect(utils::protection::no); no_access_range = cached.get_min_max(no_access_range); } @@ -1141,7 +1147,7 @@ namespace gl if (tex.is_dirty()) continue; - if (!tex.overlaps(rsx_address, true)) + if (!tex.overlaps(rsx_address, rsx::overlap_test_bounds::full_range)) continue; if ((rsx_address + rsx_size - tex.get_section_base()) <= tex.get_section_size()) diff --git a/rpcs3/Emu/RSX/Overlays/overlays.cpp b/rpcs3/Emu/RSX/Overlays/overlays.cpp index a0470929c2..faeeae381e 100644 --- a/rpcs3/Emu/RSX/Overlays/overlays.cpp +++ b/rpcs3/Emu/RSX/Overlays/overlays.cpp @@ -22,7 +22,7 @@ namespace rsx void user_interface::refresh() { - if (auto rsxthr = fxm::get()) + if (auto rsxthr = rsx::get_current_renderer()) { rsxthr->native_ui_flip_request.store(true); } diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 3ba49bebed..f0804f98d6 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -30,6 +30,7 @@ rsx::frame_capture_data frame_capture; namespace rsx { std::function g_access_violation_handler; + thread* g_current_renderer = nullptr; //TODO: Restore a working shaders cache @@ -239,10 +240,12 @@ namespace rsx thread::thread() { + g_current_renderer = this; g_access_violation_handler = [this](u32 address, bool is_writing) { return on_access_violation(address, is_writing); }; + m_rtts_dirty = true; memset(m_textures_dirty, -1, sizeof(m_textures_dirty)); memset(m_vertex_textures_dirty, -1, sizeof(m_vertex_textures_dirty)); @@ -253,6 +256,7 @@ namespace rsx thread::~thread() { g_access_violation_handler = nullptr; + g_current_renderer = nullptr; } void thread::capture_frame(const std::string &name) diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 73a0c7d315..03f027d06a 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -322,6 +322,9 @@ namespace rsx GcmTileInfo tiles[limits::tiles_count]; GcmZcullInfo zculls[limits::zculls_count]; + //super memory map (mapped block with r/w permissions) + std::pair> super_memory_map; + bool capture_current_frame = false; void capture_frame(const std::string &name); diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index fa6cab8a32..89959852b8 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -216,6 +216,13 @@ namespace vk void set_current_renderer(const vk::render_device &device) { g_current_renderer = device; + g_cb_no_interrupt_flag.store(false); + g_drv_no_primitive_restart_flag = false; + g_drv_sanitize_fp_values = false; + g_drv_disable_fence_reset = false; + g_num_processed_frames = 0; + g_num_total_frames = 0; + const auto gpu_name = g_current_renderer.gpu().name(); //Radeon fails to properly handle degenerate primitives if primitive restart is enabled diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index d857d50775..bd8d10b0c5 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -205,26 +205,20 @@ namespace vk } template - void do_memory_transfer(void *pixels_dst, const void *pixels_src, u32 channels_count) + void do_memory_transfer(void *pixels_dst, const void *pixels_src, u32 max_length) { - if (sizeof(T) == 1) - memcpy(pixels_dst, pixels_src, cpu_address_range); + if (sizeof(T) == 1 || !swapped) + { + memcpy(pixels_dst, pixels_src, max_length); + } else { - const u32 block_size = width * height * channels_count; + const u32 block_size = max_length / sizeof(T); + auto typed_dst = (be_t *)pixels_dst; + auto typed_src = (T *)pixels_src; - if (swapped) - { - auto typed_dst = (be_t *)pixels_dst; - auto typed_src = (T *)pixels_src; - - for (u32 px = 0; px < block_size; ++px) - typed_dst[px] = typed_src[px]; - } - else - { - memcpy(pixels_dst, pixels_src, block_size * sizeof(T)); - } + for (u32 px = 0; px < block_size; ++px) + typed_dst[px] = typed_src[px]; } } @@ -249,12 +243,12 @@ namespace vk flushed = true; - void* pixels_src = dma_buffer->map(0, cpu_address_range); - void* pixels_dst = vm::base(cpu_address_base); + const auto valid_range = get_confirmed_range(); + void* pixels_src = dma_buffer->map(valid_range.first, valid_range.second); + void* pixels_dst = get_raw_ptr(valid_range.first); const auto texel_layout = vk::get_format_element_size(vram_texture->info.format); const auto elem_size = texel_layout.first; - const auto channel_count = texel_layout.second; //We have to do our own byte swapping since the driver doesnt do it for us if (real_pitch == rsx_pitch) @@ -263,10 +257,10 @@ namespace vk switch (vram_texture->info.format) { case VK_FORMAT_D32_SFLOAT_S8_UINT: - rsx::convert_le_f32_to_be_d24(pixels_dst, pixels_src, cpu_address_range >> 2, 1); + rsx::convert_le_f32_to_be_d24(pixels_dst, pixels_src, valid_range.second >> 2, 1); break; case VK_FORMAT_D24_UNORM_S8_UINT: - rsx::convert_le_d24x8_to_be_d24x8(pixels_dst, pixels_src, cpu_address_range >> 2, 1); + rsx::convert_le_d24x8_to_be_d24x8(pixels_dst, pixels_src, valid_range.second >> 2, 1); break; default: is_depth_format = false; @@ -280,19 +274,19 @@ namespace vk default: LOG_ERROR(RSX, "Invalid element width %d", elem_size); case 1: - do_memory_transfer(pixels_dst, pixels_src, channel_count); + do_memory_transfer(pixels_dst, pixels_src, valid_range.second); break; case 2: if (pack_unpack_swap_bytes) - do_memory_transfer(pixels_dst, pixels_src, channel_count); + do_memory_transfer(pixels_dst, pixels_src, valid_range.second); else - do_memory_transfer(pixels_dst, pixels_src, channel_count); + do_memory_transfer(pixels_dst, pixels_src, valid_range.second); break; case 4: if (pack_unpack_swap_bytes) - do_memory_transfer(pixels_dst, pixels_src, channel_count); + do_memory_transfer(pixels_dst, pixels_src, valid_range.second); else - do_memory_transfer(pixels_dst, pixels_src, channel_count); + do_memory_transfer(pixels_dst, pixels_src, valid_range.second); break; } } @@ -314,16 +308,17 @@ namespace vk break; } - u16 row_length = u16(width * channel_count); - rsx::scale_image_nearest(pixels_dst, pixels_src, row_length, height, rsx_pitch, real_pitch, elem_size, samples_u, samples_v, pack_unpack_swap_bytes); + const u16 row_length = u16(width * texel_layout.second); + const u16 usable_height = (valid_range.second / rsx_pitch) / samples_v; + rsx::scale_image_nearest(pixels_dst, pixels_src, row_length, usable_height, rsx_pitch, real_pitch, elem_size, samples_u, samples_v, pack_unpack_swap_bytes); switch (vram_texture->info.format) { case VK_FORMAT_D32_SFLOAT_S8_UINT: - rsx::convert_le_f32_to_be_d24(pixels_dst, pixels_dst, cpu_address_range >> 2, 1); + rsx::convert_le_f32_to_be_d24(pixels_dst, pixels_dst, valid_range.second >> 2, 1); break; case VK_FORMAT_D24_UNORM_S8_UINT: - rsx::convert_le_d24x8_to_be_d24x8(pixels_dst, pixels_dst, cpu_address_range >> 2, 1); + rsx::convert_le_d24x8_to_be_d24x8(pixels_dst, pixels_dst, valid_range.second >> 2, 1); break; } } @@ -340,6 +335,16 @@ namespace vk pack_unpack_swap_bytes = swap_bytes; } + void reprotect(utils::protection prot, const std::pair& range) + { + //Reset properties and protect again + flushed = false; + synchronized = false; + sync_timestamp = 0ull; + + protect(prot, range); + } + void reprotect(utils::protection prot) { //Reset properties and protect again @@ -896,7 +901,7 @@ namespace vk else { //TODO: Confirm byte swap patterns - region.protect(utils::protection::no); + //NOTE: Protection is handled by the caller region.set_unpack_swap_bytes((aspect_flags & VK_IMAGE_ASPECT_COLOR_BIT) == VK_IMAGE_ASPECT_COLOR_BIT); no_access_range = region.get_min_max(no_access_range); } @@ -1077,7 +1082,7 @@ namespace vk if (tex.is_dirty()) continue; - if (!tex.overlaps(rsx_address, true)) + if (!tex.overlaps(rsx_address, rsx::overlap_test_bounds::full_range)) continue; if ((rsx_address + rsx_size - tex.get_section_base()) <= tex.get_section_size()) diff --git a/rpcs3/Emu/RSX/rsx_cache.h b/rpcs3/Emu/RSX/rsx_cache.h index b120d1137f..d045d01fc4 100644 --- a/rpcs3/Emu/RSX/rsx_cache.h +++ b/rpcs3/Emu/RSX/rsx_cache.h @@ -7,13 +7,22 @@ #include "Emu/Cell/Modules/cellMsgDialog.h" #include "Emu/System.h" +#include "rsx_utils.h" + namespace rsx { enum protection_policy { - protect_policy_one_page, //Only guard one page, preferably one where this section 'wholly' fits + protect_policy_one_page, //Only guard one page, preferrably one where this section 'wholly' fits protect_policy_conservative, //Guards as much memory as possible that is guaranteed to only be covered by the defined range without sharing - protect_policy_full_range //Guard the full memory range. Shared pages may be invalidated by access outside the object we're guarding + protect_policy_full_range //Guard the full memory range. Shared pages may be invalidated by access outside the object we're guarding + }; + + enum overlap_test_bounds + { + full_range, + protected_range, + confirmed_range }; class buffered_section @@ -21,6 +30,21 @@ namespace rsx private: u32 locked_address_base = 0; u32 locked_address_range = 0; + weak_ptr locked_memory_ptr; + std::pair confirmed_range; + + inline void tag_memory() + { + if (locked_memory_ptr) + { + const u32 valid_limit = (confirmed_range.second) ? confirmed_range.first + confirmed_range.second : cpu_address_range; + u32* first = locked_memory_ptr.get(confirmed_range.first); + u32* last = locked_memory_ptr.get(valid_limit - 4); + + *first = cpu_address_base + confirmed_range.first; + *last = cpu_address_base + valid_limit - 4; + } + } protected: u32 cpu_address_base = 0; @@ -37,21 +61,11 @@ namespace rsx return (base1 < limit2 && base2 < limit1); } - public: - - buffered_section() {} - ~buffered_section() {} - - void reset(u32 base, u32 length, protection_policy protect_policy = protect_policy_full_range) + inline void init_lockable_range(u32 base, u32 length) { - verify(HERE), locked == false; - - cpu_address_base = base; - cpu_address_range = length; - locked_address_base = (base & ~4095); - if ((protect_policy != protect_policy_full_range) && (length >= 4096)) + if ((guard_policy != protect_policy_full_range) && (length >= 4096)) { const u32 limit = base + length; const u32 block_end = (limit & ~4095); @@ -64,7 +78,7 @@ namespace rsx //Page boundaries cover at least one unique page locked_address_base = block_start; - if (protect_policy == protect_policy_conservative) + if (guard_policy == protect_policy_conservative) { //Protect full unique range locked_address_range = (block_end - block_start); @@ -75,24 +89,83 @@ namespace rsx locked_address_range = align(base + length, 4096) - locked_address_base; verify(HERE), locked_address_range > 0; + } + + public: + + buffered_section() {} + ~buffered_section() {} + + void reset(u32 base, u32 length, protection_policy protect_policy = protect_policy_full_range) + { + verify(HERE), locked == false; + + cpu_address_base = base; + cpu_address_range = length; + + confirmed_range = { 0, 0 }; protection = utils::protection::rw; guard_policy = protect_policy; locked = false; + + init_lockable_range(cpu_address_base, cpu_address_range); } void protect(utils::protection prot) { if (prot == protection) return; + verify(HERE), locked_address_range > 0; utils::memory_protect(vm::base(locked_address_base), locked_address_range, prot); protection = prot; locked = prot != utils::protection::rw; + + if (prot == utils::protection::no) + { + locked_memory_ptr = rsx::get_super_ptr(cpu_address_base, cpu_address_range); + tag_memory(); + } + else + { + if (!locked) + { + //Unprotect range also invalidates secured range + confirmed_range = { 0, 0 }; + } + + locked_memory_ptr = {}; + } + } + + void protect(utils::protection prot, const std::pair& range_confirm) + { + if (prot != utils::protection::rw) + { + const auto old_prot = protection; + const auto old_locked_base = locked_address_base; + const auto old_locked_length = locked_address_range; + protection = utils::protection::rw; + + if (confirmed_range.second) + { + const u32 range_limit = std::max(range_confirm.first + range_confirm.second, confirmed_range.first + confirmed_range.second); + confirmed_range.first = std::min(confirmed_range.first, range_confirm.first); + confirmed_range.second = range_limit - confirmed_range.first; + } + else + { + confirmed_range = range_confirm; + } + + init_lockable_range(confirmed_range.first + cpu_address_base, confirmed_range.second); + } + + protect(prot); } void unprotect() { protect(utils::protection::rw); - locked = false; } void discard() @@ -112,27 +185,55 @@ namespace rsx return region_overlaps(locked_address_base, locked_address_base + locked_address_range, range.first, range.first + range.second); } - bool overlaps(u32 address, bool ignore_protection_range) const + bool overlaps(u32 address, overlap_test_bounds bounds) const { - if (!ignore_protection_range) - return (locked_address_base <= address && (address - locked_address_base) < locked_address_range); - else + switch (bounds) + { + case overlap_test_bounds::full_range: + { return (cpu_address_base <= address && (address - cpu_address_base) < cpu_address_range); + } + case overlap_test_bounds::protected_range: + { + return (locked_address_base <= address && (address - locked_address_base) < locked_address_range); + } + case overlap_test_bounds::confirmed_range: + { + const auto range = get_confirmed_range(); + return ((range.first + cpu_address_base) <= address && (address - range.first) < range.second); + } + default: + fmt::throw_exception("Unreachable" HERE); + } } - bool overlaps(std::pair range, bool ignore_protection_range) const + bool overlaps(const std::pair& range, overlap_test_bounds bounds) const { - if (!ignore_protection_range) - return region_overlaps(locked_address_base, locked_address_base + locked_address_range, range.first, range.first + range.second); - else + switch (bounds) + { + case overlap_test_bounds::full_range: + { return region_overlaps(cpu_address_base, cpu_address_base + cpu_address_range, range.first, range.first + range.second); + } + case overlap_test_bounds::protected_range: + { + return region_overlaps(locked_address_base, locked_address_base + locked_address_range, range.first, range.first + range.second); + } + case overlap_test_bounds::confirmed_range: + { + const auto test_range = get_confirmed_range(); + return region_overlaps(test_range.first + cpu_address_base, test_range.first + cpu_address_base + test_range.second, range.first, range.first + range.second); + } + default: + fmt::throw_exception("Unreachable" HERE); + } } /** * Check if the page containing the address tramples this section. Also compares a former trampled page range to compare * If true, returns the range with updated invalid range */ - std::tuple> overlaps_page(std::pair old_range, u32 address, bool full_range_check) const + std::tuple> overlaps_page(const std::pair& old_range, u32 address, overlap_test_bounds bounds) const { const u32 page_base = address & ~4095; const u32 page_limit = address + 4096; @@ -141,16 +242,29 @@ namespace rsx const u32 compare_max = std::max(old_range.second, page_limit); u32 memory_base, memory_range; - if (full_range_check && guard_policy != protection_policy::protect_policy_full_range) + switch (bounds) + { + case overlap_test_bounds::full_range: { - //Make sure protection range is full range memory_base = (cpu_address_base & ~4095); memory_range = align(cpu_address_base + cpu_address_range, 4096u) - memory_base; + break; } - else + case overlap_test_bounds::protected_range: { memory_base = locked_address_base; memory_range = locked_address_range; + break; + } + case overlap_test_bounds::confirmed_range: + { + const auto range = get_confirmed_range(); + memory_base = (cpu_address_base + range.first) & ~4095; + memory_range = align(cpu_address_base + range.first + range.second, 4096u) - memory_base; + break; + } + default: + fmt::throw_exception("Unreachable" HERE); } if (!region_overlaps(memory_base, memory_base + memory_range, compare_min, compare_max)) @@ -191,7 +305,7 @@ namespace rsx return (cpu_address_base == cpu_address && cpu_address_range == size); } - std::pair get_min_max(std::pair current_min_max) const + std::pair get_min_max(const std::pair& current_min_max) const { u32 min = std::min(current_min_max.first, locked_address_base); u32 max = std::max(current_min_max.second, locked_address_base + locked_address_range); @@ -203,6 +317,46 @@ namespace rsx { return protection; } + + template + T* get_raw_ptr(u32 offset = 0) const + { + verify(HERE), locked_memory_ptr; + return locked_memory_ptr.get(offset); + } + + bool test_memory_head() const + { + if (!locked_memory_ptr) + { + return false; + } + + const u32* first = locked_memory_ptr.get(confirmed_range.first); + return (*first == (cpu_address_base + confirmed_range.first)); + } + + bool test_memory_tail() const + { + if (!locked_memory_ptr) + { + return false; + } + + const u32 valid_limit = (confirmed_range.second) ? confirmed_range.first + confirmed_range.second : cpu_address_range; + const u32* last = locked_memory_ptr.get(valid_limit - 4); + return (*last == (cpu_address_base + valid_limit - 4)); + } + + std::pair get_confirmed_range() const + { + if (confirmed_range.second == 0) + { + return { 0, cpu_address_range }; + } + + return confirmed_range; + } }; template diff --git a/rpcs3/Emu/RSX/rsx_utils.cpp b/rpcs3/Emu/RSX/rsx_utils.cpp index b1bc398293..e18898723e 100644 --- a/rpcs3/Emu/RSX/rsx_utils.cpp +++ b/rpcs3/Emu/RSX/rsx_utils.cpp @@ -1,6 +1,7 @@ #include "stdafx.h" #include "rsx_utils.h" #include "rsx_methods.h" +#include "RSXThread.h" #include "Emu/RSX/GCM.h" #include "Common/BufferUtils.h" #include "Overlays/overlays.h" @@ -75,6 +76,49 @@ namespace rsx } } + weak_ptr get_super_ptr(u32 addr, u32 len) + { + verify(HERE), g_current_renderer; + + if (!g_current_renderer->super_memory_map.first) + { + auto block = vm::get(vm::any, 0xC0000000); + if (block) + { + g_current_renderer->super_memory_map.first = block->used(); + g_current_renderer->super_memory_map.second = vm::get_super_ptr(0xC0000000, g_current_renderer->super_memory_map.first - 1); + + if (!g_current_renderer->super_memory_map.second) + { + //Disjoint allocation? + LOG_ERROR(RSX, "Could not initialize contiguous RSX super-memory"); + } + } + else + { + fmt::throw_exception("RSX memory not mapped!"); + } + } + + if (g_current_renderer->super_memory_map.second) + { + if (addr >= 0xC0000000 && (addr + len) <= (0xC0000000 + g_current_renderer->super_memory_map.first)) + { + //RSX local + return { g_current_renderer->super_memory_map.second.get() + (addr - 0xC0000000) }; + } + } + + auto result = vm::get_super_ptr(addr, len - 1); + if (!result) + { + //Probably allocated as split blocks?? + LOG_ERROR(RSX, "Could not get super_ptr for memory block 0x%x+0x%x", addr, len); + } + + return { result }; + } + /* Fast image scaling routines * Only uses fast nearest scaling and integral scaling factors * T - Dst type diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index 7af460b4b3..c52ea6d6a4 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -4,6 +4,7 @@ #include "Utilities/geometry.h" #include "gcm_enums.h" #include +#include // TODO: replace the code below by #include when C++17 or newer will be used #include @@ -20,6 +21,9 @@ extern "C" namespace rsx { + class thread; + extern thread* g_current_renderer; + //Base for resources with reference counting struct ref_counted { @@ -28,6 +32,43 @@ namespace rsx void reset_refs() { deref_count = 0; } }; + //Weak pointer without lock semantics + //Backed by a real shared_ptr for non-rsx memory + //Backed by a global shared pool for rsx memory + struct weak_ptr + { + void* _ptr; + std::shared_ptr _extern; + + weak_ptr(void* raw, bool is_rsx_mem = true) + { + _ptr = raw; + if (!is_rsx_mem) _extern.reset((u8*)raw); + } + + weak_ptr(std::shared_ptr& block) + { + _extern = block; + _ptr = _extern.get(); + } + + weak_ptr() + { + _ptr = nullptr; + } + + template + T* get(u32 offset = 0) const + { + return (T*)((u8*)_ptr + offset); + } + + operator bool() const + { + return (_ptr != nullptr); + } + }; + //Holds information about a framebuffer struct gcm_framebuffer_info { @@ -289,6 +330,9 @@ namespace rsx std::array get_constant_blend_colors(); + // Acquire memory mirror with r/w permissions + weak_ptr get_super_ptr(u32 addr, u32 size); + /** * Shuffle texel layout from xyzw to wzyx * TODO: Variable src/dst and optional se conversion @@ -498,4 +542,9 @@ namespace rsx result.a = ((colorref >> 24) & 0xFF) / 255.f; return result; } + + static inline thread* get_current_renderer() + { + return g_current_renderer; + } } diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index e1156e9bfe..5b6ca8b473 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -424,7 +424,7 @@ - + diff --git a/rpcs3/rpcs3qt/rsx_debugger.cpp b/rpcs3/rpcs3qt/rsx_debugger.cpp index b4298a5aa7..e22aae163b 100644 --- a/rpcs3/rpcs3qt/rsx_debugger.cpp +++ b/rpcs3/rpcs3qt/rsx_debugger.cpp @@ -213,7 +213,7 @@ rsx_debugger::rsx_debugger(std::shared_ptr gui_settings, QWidget* //Events connect(b_goto_get, &QAbstractButton::clicked, [=] { - if (const auto render = fxm::get()) + if (const auto render = rsx::get_current_renderer()) { u32 realAddr; if (RSXIOMem.getRealAddr(render->ctrl->get.load(), realAddr)) @@ -225,7 +225,7 @@ rsx_debugger::rsx_debugger(std::shared_ptr gui_settings, QWidget* }); connect(b_goto_put, &QAbstractButton::clicked, [=] { - if (const auto render = fxm::get()) + if (const auto render = rsx::get_current_renderer()) { u32 realAddr; if (RSXIOMem.getRealAddr(render->ctrl->put.load(), realAddr)) @@ -377,7 +377,7 @@ void Buffer::showImage(const QImage& image) void Buffer::ShowWindowed() { - const auto render = fxm::get(); + const auto render = rsx::get_current_renderer(); if (!render) return; @@ -667,7 +667,7 @@ void rsx_debugger::GetMemory() void rsx_debugger::GetBuffers() { - const auto render = fxm::get(); + const auto render = rsx::get_current_renderer(); if (!render) { return; @@ -744,7 +744,7 @@ void rsx_debugger::GetBuffers() void rsx_debugger::GetFlags() { - const auto render = fxm::get(); + const auto render = rsx::get_current_renderer(); if (!render) { return; @@ -781,7 +781,7 @@ void rsx_debugger::GetFlags() void rsx_debugger::GetLightning() { - const auto render = fxm::get(); + const auto render = rsx::get_current_renderer(); if (!render) { return; @@ -800,7 +800,7 @@ void rsx_debugger::GetLightning() void rsx_debugger::GetTexture() { - const auto render = fxm::get(); + const auto render = rsx::get_current_renderer(); if (!render) { return; @@ -843,7 +843,7 @@ void rsx_debugger::GetTexture() void rsx_debugger::GetSettings() { - const auto render = fxm::get(); + const auto render = rsx::get_current_renderer(); if (!render) { return; @@ -942,7 +942,7 @@ void rsx_debugger::SetFlags() void rsx_debugger::SetPrograms() { - const auto render = fxm::get(); + const auto render = rsx::get_current_renderer(); if (!render) { return;