From 8fcd5c1e5a83a5159525cf8c723cd701ec8e26c0 Mon Sep 17 00:00:00 2001
From: kd-11 <karokidii@gmail.com>
Date: Thu, 10 May 2018 14:50:32 +0300
Subject: [PATCH] rsx: Texture cache fixes 1. rsx: Rework section
 synchronization using the new memory mirrors 2. rsx: Tweaks     - Simplify
 peeking into the current rsx::thread instance.       Use a simple
 rsx::get_current_renderer instead of asking fxm for the same     - Fix global
 rsx super memory shm block management 3. rsx: Improve memory validation.
 test_framebuffer() and tag_framebuffer() are simplified due to mirror support
 4. rsx: Only write back confirmed memory range to avoid overapproximation
 errors in blit engine 5. rsx: Explicitly mark clobbered flushable sections as
 dirty to have them removed 6. rsx: Cumulative fixes     - Reimplement
 rsx::buffered_section management routines     - blit engine subsections are
 not hit-tested against confirmed/committed memory range       Not all
 applications are 'honest' about region bounds, making the real cpu range
 useless for blit ops

---
 rpcs3/Emu/Cell/Modules/cellGcmSys.cpp |  46 ++--
 rpcs3/Emu/Cell/lv2/sys_rsx.cpp        |   4 +-
 rpcs3/Emu/RSX/Common/texture_cache.h  | 300 +++++++++++---------------
 rpcs3/Emu/RSX/GL/GLTextureCache.h     |  32 +--
 rpcs3/Emu/RSX/Overlays/overlays.cpp   |   2 +-
 rpcs3/Emu/RSX/RSXThread.cpp           |   4 +
 rpcs3/Emu/RSX/RSXThread.h             |   3 +
 rpcs3/Emu/RSX/VK/VKHelpers.cpp        |   7 +
 rpcs3/Emu/RSX/VK/VKTextureCache.h     |  69 +++---
 rpcs3/Emu/RSX/rsx_cache.h             | 212 +++++++++++++++---
 rpcs3/Emu/RSX/rsx_utils.cpp           |  44 ++++
 rpcs3/Emu/RSX/rsx_utils.h             |  49 +++++
 rpcs3/emucore.vcxproj                 |   2 +-
 rpcs3/rpcs3qt/rsx_debugger.cpp        |  18 +-
 14 files changed, 512 insertions(+), 280 deletions(-)
diff --git a/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp b/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp
index 1e7ed43680..b3692c547e 100644
--- a/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp
@@ -275,7 +275,7 @@ s32 cellGcmBindTile(u8 index)
 		return CELL_GCM_ERROR_INVALID_VALUE;
 	}
 
-	fxm::get<GSRender>()->tiles[index].binded = true;
+	rsx::get_current_renderer()->tiles[index].binded = true;
 
 	return CELL_OK;
 }
@@ -291,7 +291,7 @@ s32 cellGcmBindZcull(u8 index, u32 offset, u32 width, u32 height, u32 cullStart,
 		return CELL_GCM_ERROR_INVALID_VALUE;
 	}
 
-	fxm::get<GSRender>()->zculls[index].binded = true;
+	rsx::get_current_renderer()->zculls[index].binded = true;
 
 	return CELL_OK;
 }
@@ -307,7 +307,7 @@ void cellGcmGetConfiguration(vm::ptr<CellGcmConfig> config)
 
 u32 cellGcmGetFlipStatus()
 {
-	u32 status = fxm::get<GSRender>()->flip_status;
+	u32 status = rsx::get_current_renderer()->flip_status;
 
 	cellGcmSys.trace("cellGcmGetFlipStatus() -> %d", status);
 
@@ -421,7 +421,7 @@ s32 _cellGcmInitBody(vm::pptr<CellGcmContextData> context, u32 cmdSize, u32 ioSi
 	ctrl.get = 0;
 	ctrl.ref = -1;
 
-	const auto render = fxm::get<GSRender>();
+	const auto render = rsx::get_current_renderer();
 	render->intr_thread = idm::make_ptr<ppu_thread>("_gcm_intr_thread", 1, 0x4000);
 	render->intr_thread->run();
 	render->main_mem_addr = 0;
@@ -436,7 +436,7 @@ void cellGcmResetFlipStatus()
 {
 	cellGcmSys.trace("cellGcmResetFlipStatus()");
 
-	fxm::get<GSRender>()->flip_status = CELL_GCM_DISPLAY_FLIP_STATUS_WAITING;
+	rsx::get_current_renderer()->flip_status = CELL_GCM_DISPLAY_FLIP_STATUS_WAITING;
 }
 
 void cellGcmSetDebugOutputLevel(s32 level)
@@ -448,7 +448,7 @@ void cellGcmSetDebugOutputLevel(s32 level)
 	case CELL_GCM_DEBUG_LEVEL0:
 	case CELL_GCM_DEBUG_LEVEL1:
 	case CELL_GCM_DEBUG_LEVEL2:
-		fxm::get<GSRender>()->debug_level = level;
+		rsx::get_current_renderer()->debug_level = level;
 		break;
 
 	default:
@@ -470,7 +470,7 @@ s32 cellGcmSetDisplayBuffer(u8 id, u32 offset, u32 pitch, u32 width, u32 height)
 		return CELL_GCM_ERROR_FAILURE;
 	}
 
-	const auto render = fxm::get<GSRender>();
+	const auto render = rsx::get_current_renderer();
 
 	auto buffers = render->display_buffers;
 
@@ -496,21 +496,21 @@ void cellGcmSetFlipHandler(vm::ptr<void(u32)> handler)
 {
 	cellGcmSys.warning("cellGcmSetFlipHandler(handler=*0x%x)", handler);
 
-	fxm::get<GSRender>()->flip_handler = handler;
+	rsx::get_current_renderer()->flip_handler = handler;
 }
 
 void cellGcmSetFlipMode(u32 mode)
 {
 	cellGcmSys.warning("cellGcmSetFlipMode(mode=%d)", mode);
 
-	fxm::get<GSRender>()->requested_vsync.store(mode == CELL_GCM_DISPLAY_VSYNC);
+	rsx::get_current_renderer()->requested_vsync.store(mode == CELL_GCM_DISPLAY_VSYNC);
 }
 
 void cellGcmSetFlipStatus()
 {
 	cellGcmSys.warning("cellGcmSetFlipStatus()");
 
-	fxm::get<GSRender>()->flip_status = CELL_GCM_DISPLAY_FLIP_STATUS_DONE;
+	rsx::get_current_renderer()->flip_status = CELL_GCM_DISPLAY_FLIP_STATUS_DONE;
 }
 
 s32 cellGcmSetPrepareFlip(ppu_thread& ppu, vm::ptr<CellGcmContextData> ctxt, u32 id)
@@ -561,7 +561,7 @@ void cellGcmSetSecondVFrequency(u32 freq)
 {
 	cellGcmSys.warning("cellGcmSetSecondVFrequency(level=%d)", freq);
 
-	const auto render = fxm::get<GSRender>();
+	const auto render = rsx::get_current_renderer();
 
 	switch (freq)
 	{
@@ -612,7 +612,7 @@ s32 cellGcmSetTileInfo(u8 index, u8 location, u32 offset, u32 size, u32 pitch, u
 		cellGcmSys.error("cellGcmSetTileInfo: bad compression mode! (%d)", comp);
 	}
 
-	const auto render = fxm::get<GSRender>();
+	const auto render = rsx::get_current_renderer();
 
 	auto& tile = render->tiles[index];
 	tile.location = location;
@@ -631,7 +631,7 @@ void cellGcmSetUserHandler(vm::ptr<void(u32)> handler)
 {
 	cellGcmSys.warning("cellGcmSetUserHandler(handler=*0x%x)", handler);
 
-	fxm::get<GSRender>()->user_handler = handler;
+	rsx::get_current_renderer()->user_handler = handler;
 }
 
 void cellGcmSetUserCommand(vm::ptr<CellGcmContextData> ctxt, u32 cause)
@@ -643,7 +643,7 @@ void cellGcmSetVBlankHandler(vm::ptr<void(u32)> handler)
 {
 	cellGcmSys.warning("cellGcmSetVBlankHandler(handler=*0x%x)", handler);
 
-	fxm::get<GSRender>()->vblank_handler = handler;
+	rsx::get_current_renderer()->vblank_handler = handler;
 }
 
 void cellGcmSetWaitFlip(vm::ptr<CellGcmContextData> ctxt)
@@ -675,7 +675,7 @@ void cellGcmSetZcull(u8 index, u32 offset, u32 width, u32 height, u32 cullStart,
 		return;
 	}
 
-	const auto render = fxm::get<GSRender>();
+	const auto render = rsx::get_current_renderer();
 
 	auto& zcull = render->zculls[index];
 	zcull.offset = offset;
@@ -703,7 +703,7 @@ s32 cellGcmUnbindTile(u8 index)
 		return CELL_GCM_ERROR_INVALID_VALUE;
 	}
 
-	fxm::get<GSRender>()->tiles[index].binded = false;
+	rsx::get_current_renderer()->tiles[index].binded = false;
 
 	return CELL_OK;
 }
@@ -718,7 +718,7 @@ s32 cellGcmUnbindZcull(u8 index)
 		return CELL_GCM_ERROR_INVALID_VALUE;
 	}
 
-	fxm::get<GSRender>()->zculls[index].binded = false;
+	rsx::get_current_renderer()->zculls[index].binded = false;
 
 	return CELL_OK;
 }
@@ -754,7 +754,7 @@ s32 cellGcmGetCurrentDisplayBufferId(vm::ptr<u8> id)
 {
 	cellGcmSys.warning("cellGcmGetCurrentDisplayBufferId(id=*0x%x)", id);
 
-	if ((*id = fxm::get<GSRender>()->current_display_buffer) > UINT8_MAX)
+	if ((*id = rsx::get_current_renderer()->current_display_buffer) > UINT8_MAX)
 	{
 		fmt::throw_exception("Unexpected" HERE);
 	}
@@ -788,7 +788,7 @@ u64 cellGcmGetLastFlipTime()
 {
 	cellGcmSys.trace("cellGcmGetLastFlipTime()");
 
-	return fxm::get<GSRender>()->last_flip_time;
+	return rsx::get_current_renderer()->last_flip_time;
 }
 
 u64 cellGcmGetLastSecondVTime()
@@ -801,7 +801,7 @@ u64 cellGcmGetVBlankCount()
 {
 	cellGcmSys.trace("cellGcmGetVBlankCount()");
 
-	return fxm::get<GSRender>()->vblank_count;
+	return rsx::get_current_renderer()->vblank_count;
 }
 
 s32 cellGcmSysGetLastVBlankTime()
@@ -933,7 +933,7 @@ s32 gcmMapEaIoAddress(u32 ea, u32 io, u32 size, bool is_strict)
 {
 	if ((ea & 0xFFFFF) || (io & 0xFFFFF) || (size & 0xFFFFF)) return CELL_GCM_ERROR_FAILURE;
 
-	const auto render = fxm::get<GSRender>();
+	const auto render = rsx::get_current_renderer();
 
 	// Check if the mapping was successfull
 	if (RSXIOMem.Map(ea, size, io))
@@ -997,7 +997,7 @@ s32 cellGcmMapMainMemory(u32 ea, u32 size, vm::ptr<u32> offset)
 
 	u32 io = RSXIOMem.Map(ea, size);
 
-	const auto render = fxm::get<GSRender>();
+	const auto render = rsx::get_current_renderer();
 
 	//check if the mapping was successfull
 	if (RSXIOMem.RealAddr(io) == ea)
@@ -1237,7 +1237,7 @@ s32 cellGcmSetTile(u8 index, u8 location, u32 offset, u32 size, u32 pitch, u8 co
 		cellGcmSys.error("cellGcmSetTile: bad compression mode! (%d)", comp);
 	}
 
-	const auto render = fxm::get<GSRender>();
+	const auto render = rsx::get_current_renderer();
 
 	auto& tile = render->tiles[index];
 	tile.location = location;
diff --git a/rpcs3/Emu/Cell/lv2/sys_rsx.cpp b/rpcs3/Emu/Cell/lv2/sys_rsx.cpp
index a41ff71c58..edf165bc30 100644
--- a/rpcs3/Emu/Cell/lv2/sys_rsx.cpp
+++ b/rpcs3/Emu/Cell/lv2/sys_rsx.cpp
@@ -150,7 +150,7 @@ s32 sys_rsx_context_allocate(vm::ptr<u32> context_id, vm::ptr<u64> lpar_dma_cont
 
 	m_sysrsx->rsx_event_port = queueId->value();
 
-	const auto render = fxm::get<GSRender>();
+	const auto render = rsx::get_current_renderer();
 	render->display_buffers_count = 0;
 	render->current_display_buffer = 0;
 	render->main_mem_addr = 0;
@@ -222,7 +222,7 @@ s32 sys_rsx_context_attribute(s32 context_id, u32 package_id, u64 a3, u64 a4, u6
 
 	// todo: these event ports probly 'shouldnt' be here as i think its supposed to be interrupts that are sent from rsx somewhere in lv1
 
-	const auto render = fxm::get<GSRender>();
+	const auto render = rsx::get_current_renderer();
 
 	//hle protection
 	if (render->isHLE)
diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h
index 5c9181fd8d..5f031ec5d8 100644
--- a/rpcs3/Emu/RSX/Common/texture_cache.h
+++ b/rpcs3/Emu/RSX/Common/texture_cache.h
@@ -271,8 +271,7 @@ namespace rsx
 		{
 			bool violation_handled = false;
 			std::vector<section_storage_type*> sections_to_flush; //Sections to be flushed
-			std::vector<section_storage_type*> sections_to_reprotect; //Sections to be protected after flushing
-			std::vector<section_storage_type*> sections_to_unprotect; //These sections are to be unprotected and discarded by caller
+			std::vector<section_storage_type*> sections_to_unprotect; //These sections are to be unpotected and discarded by caller
 			int num_flushable = 0;
 			u64 cache_tag = 0;
 			u32 address_base = 0;
@@ -465,7 +464,7 @@ namespace rsx
 			{
 				for (auto &tex : found->second.data)
 				{
-					if (tex.is_locked() && tex.overlaps(address, false))
+					if (tex.is_locked() && tex.overlaps(address, rsx::overlap_test_bounds::protected_range))
 						return{ tex.get_protection(), &tex };
 				}
 			}
@@ -476,7 +475,7 @@ namespace rsx
 			{
 				for (auto &tex : found->second.data)
 				{
-					if (tex.is_locked() && tex.overlaps(address, false))
+					if (tex.is_locked() && tex.overlaps(address, rsx::overlap_test_bounds::protected_range))
 						return{ tex.get_protection(), &tex };
 				}
 			}
@@ -541,7 +540,11 @@ namespace rsx
 					if (tex.cache_tag == cache_tag) continue; //already processed
 					if (!tex.is_locked()) continue;	//flushable sections can be 'clean' but unlocked. TODO: Handle this better
 
-					auto overlapped = tex.overlaps_page(trampled_range, address, strict_range_check || tex.get_context() == rsx::texture_upload_context::blit_engine_dst);
+					const auto bounds_test = (strict_range_check || tex.get_context() == rsx::texture_upload_context::blit_engine_dst) ?
+						rsx::overlap_test_bounds::full_range :
+						rsx::overlap_test_bounds::protected_range;
+
+					auto overlapped = tex.overlaps_page(trampled_range, address, bounds_test);
 					if (std::get<0>(overlapped))
 					{
 						auto &new_range = std::get<1>(overlapped);
@@ -598,61 +601,94 @@ namespace rsx
 					}
 				}
 
-				std::vector<utils::protection> reprotections;
 				for (auto &obj : trampled_set)
 				{
-					bool to_reprotect = false;
-
-					if (!deferred_flush && !discard_only)
+					if (!discard_only)
 					{
-						if (!is_writing && obj.first->get_protection() != utils::protection::no)
+						bool collateral = false;
+						if (!deferred_flush)
 						{
-							to_reprotect = true;
-						}
-						else
-						{
-							if (rebuild_cache && allow_flush && obj.first->is_flushable())
+							if (!is_writing && obj.first->get_protection() != utils::protection::no)
 							{
-								const std::pair<u32, u32> null_check = std::make_pair(UINT32_MAX, 0);
-								to_reprotect = !std::get<0>(obj.first->overlaps_page(null_check, address, true));
+								collateral = true;
+							}
+							else
+							{
+								if (rebuild_cache && allow_flush && obj.first->is_flushable())
+								{
+									const std::pair<u32, u32> null_check = std::make_pair(UINT32_MAX, 0);
+									collateral = !std::get<0>(obj.first->overlaps_page(null_check, address, rsx::overlap_test_bounds::full_range));
+								}
 							}
 						}
+
+						if (collateral)
+						{
+							//False positive
+							continue;
+						}
+						else if (obj.first->is_flushable())
+						{
+							//Write if and only if no one else has trashed section memory already
+							//TODO: Proper section management should prevent this from happening
+							//TODO: Blit engine section merge support and/or partial texture memory buffering
+							if (!obj.first->test_memory_head() || !obj.first->test_memory_tail())
+							{
+								if (obj.first->get_memory_read_flags() == rsx::memory_read_flags::flush_always)
+								{
+									//Someone decided to overwrite memory specified as an active framebuffer
+									m_flush_always_cache.erase(obj.first->get_section_base());
+								}
+
+								//Contents clobbered, destroy this
+								obj.first->set_dirty(true);
+								m_unreleased_texture_objects++;
+
+								result.sections_to_unprotect.push_back(obj.first);
+							}
+							else if (!allow_flush)
+							{
+								result.sections_to_flush.push_back(obj.first);
+							}
+							else
+							{
+								if (!obj.first->flush(std::forward<Args>(extras)...))
+								{
+									//Missed address, note this
+									//TODO: Lower severity when successful to keep the cache from overworking
+									record_cache_miss(*obj.first);
+								}
+
+								m_num_flush_requests++;
+								result.sections_to_unprotect.push_back(obj.first);
+							}
+
+							continue;
+						}
+						else if (deferred_flush)
+						{
+							//allow_flush = false and not synchronized
+							result.sections_to_unprotect.push_back(obj.first);
+							continue;
+						}
 					}
 
-					if (to_reprotect)
-					{
-						result.sections_to_reprotect.push_back(obj.first);
-						reprotections.push_back(obj.first->get_protection());
-					}
-					else if (obj.first->is_flushable())
-					{
-						result.sections_to_flush.push_back(obj.first);
-					}
-					else if (!deferred_flush)
+					if (!obj.first->is_flushable())
 					{
 						obj.first->set_dirty(true);
 						m_unreleased_texture_objects++;
 					}
-					else
-					{
-						result.sections_to_unprotect.push_back(obj.first);
-					}
-
-					if (deferred_flush)
-						continue;
 
+					//Only unsynchronized (no-flush) sections should reach here, and only if the rendering thread is the caller
 					if (discard_only)
 						obj.first->discard();
 					else
 						obj.first->unprotect();
 
-					if (!to_reprotect)
-					{
-						obj.second->remove_one();
-					}
+					obj.second->remove_one();
 				}
 
-				if (deferred_flush)
+				if (deferred_flush && result.sections_to_flush.size())
 				{
 					result.num_flushable = static_cast<int>(result.sections_to_flush.size());
 					result.address_base = address;
@@ -660,33 +696,16 @@ namespace rsx
 					result.cache_tag = m_cache_update_tag.load(std::memory_order_consume);
 					return result;
 				}
-
-				if (result.sections_to_flush.size() > 0)
+				else
 				{
-					verify(HERE), allow_flush;
-
-					// Flush here before 'reprotecting' since flushing will write the whole span
-					for (const auto &tex : result.sections_to_flush)
+					//Flushes happen in one go, now its time to remove protection
+					for (auto& section : result.sections_to_unprotect)
 					{
-						if (!tex->flush(std::forward<Args>(extras)...))
-						{
-							//Missed address, note this
-							//TODO: Lower severity when successful to keep the cache from overworking
-							record_cache_miss(*tex);
-						}
-
-						m_num_flush_requests++;
+						section->unprotect();
+						m_cache[get_block_address(section->get_section_base())].remove_one();
 					}
 				}
 
-				int n = 0;
-				for (auto &tex: result.sections_to_reprotect)
-				{
-					tex->discard();
-					tex->protect(reprotections[n++]);
-					tex->set_dirty(false);
-				}
-
 				//Everything has been handled
 				result = {};
 				result.violation_handled = true;
@@ -807,7 +826,7 @@ namespace rsx
 					if (tex.get_section_base() > rsx_address)
 						continue;
 
-					if (!tex.is_dirty() && tex.overlaps(test, true))
+					if (!tex.is_dirty() && tex.overlaps(test, rsx::overlap_test_bounds::full_range))
 						results.push_back(&tex);
 				}
 			}
@@ -1076,7 +1095,7 @@ namespace rsx
 					if (tex.is_dirty()) continue;
 					if (!tex.is_flushable()) continue;
 
-					if (tex.overlaps(address, false))
+					if (tex.overlaps(address, rsx::overlap_test_bounds::protected_range))
 						return std::make_tuple(true, &tex);
 				}
 			}
@@ -1100,7 +1119,7 @@ namespace rsx
 					if (tex.is_dirty()) continue;
 					if (!tex.is_flushable()) continue;
 
-					if (tex.overlaps(address, false))
+					if (tex.overlaps(address, rsx::overlap_test_bounds::protected_range))
 						return std::make_tuple(true, &tex);
 				}
 			}
@@ -1138,20 +1157,21 @@ namespace rsx
 
 			if (m_cache_update_tag.load(std::memory_order_consume) == data.cache_tag)
 			{
-				std::vector<utils::protection> old_protections;
-				for (auto &tex : data.sections_to_reprotect)
+				//1. Write memory to cpu side
+				for (auto &tex : data.sections_to_flush)
 				{
 					if (tex->is_locked())
 					{
-						old_protections.push_back(tex->get_protection());
-						tex->unprotect();
-					}
-					else
-					{
-						old_protections.push_back(utils::protection::rw);
+						if (!tex->flush(std::forward<Args>(extras)...))
+						{
+							record_cache_miss(*tex);
+						}
+
+						m_num_flush_requests++;
 					}
 				}
 
+				//2. Release all obsolete sections
 				for (auto &tex : data.sections_to_unprotect)
 				{
 					if (tex->is_locked())
@@ -1162,46 +1182,11 @@ namespace rsx
 					}
 				}
 
-				//TODO: This bit can cause race conditions if other threads are accessing this memory
-				//1. Force readback if surface is not synchronized yet to make unlocked part finish quickly
-				for (auto &tex : data.sections_to_flush)
-				{
-					if (tex->is_locked())
-					{
-						if (!tex->is_synchronized())
-						{
-							record_cache_miss(*tex);
-							tex->copy_texture(true, std::forward<Args>(extras)...);
-						}
-
-						m_cache[get_block_address(tex->get_section_base())].remove_one();
-					}
-				}
-
-				//TODO: Acquire global io lock here
-
-				//2. Unprotect all the memory
+				//3. Release all flushed sections
 				for (auto &tex : data.sections_to_flush)
 				{
 					tex->unprotect();
-				}
-
-				//3. Write all the memory
-				for (auto &tex : data.sections_to_flush)
-				{
-					tex->flush(std::forward<Args>(extras)...);
-					m_num_flush_requests++;
-				}
-
-				//Restore protection on the sections to reprotect
-				int n = 0;
-				for (auto &tex : data.sections_to_reprotect)
-				{
-					if (old_protections[n] != utils::protection::rw)
-					{
-						tex->discard();
-						tex->protect(old_protections[n++]);
-					}
+					m_cache[get_block_address(tex->get_section_base())].remove_one();
 				}
 			}
 			else
@@ -1781,7 +1766,7 @@ namespace rsx
 						for (const auto &surface : overlapping_surfaces)
 						{
 							if (surface->get_context() != rsx::texture_upload_context::blit_engine_dst ||
-								!surface->is_locked())
+								!surface->overlaps(std::make_pair(texaddr, tex_size), rsx::overlap_test_bounds::confirmed_range))
 								continue;
 
 							if (surface->get_width() >= tex_width && surface->get_height() >= tex_height)
@@ -2251,30 +2236,41 @@ namespace rsx
 					dst_is_argb8 ? rsx::texture_create_flags::default_component_order :
 					rsx::texture_create_flags::swapped_native_component_order;
 
-				dest_texture = create_new_texture(cmd, dst.rsx_address, dst.pitch * dst_dimensions.height,
+				//NOTE: Should upload from cpu instead of creating a blank texture
+				cached_dest = create_new_texture(cmd, dst.rsx_address, dst.pitch * dst_dimensions.height,
 					dst_dimensions.width, dst_dimensions.height, 1, 1,
 					gcm_format, rsx::texture_upload_context::blit_engine_dst, rsx::texture_dimension_extended::texture_dimension_2d,
-					channel_order, rsx::texture_colorspace::rgb_linear, rsx::default_remap_vector)->get_raw_texture();
+					channel_order, rsx::texture_colorspace::rgb_linear, rsx::default_remap_vector);
 
+				dest_texture = cached_dest->get_raw_texture();
 				m_texture_memory_in_use += dst.pitch * dst_dimensions.height;
 			}
-			else if (cached_dest)
-			{
-				if (!cached_dest->is_locked())
-				{
-					lock.upgrade();
 
-					cached_dest->reprotect(utils::protection::no);
+			if (cached_dest)
+			{
+				const bool notify = !cached_dest->is_locked();
+				const u32 mem_base = dst_area.y1 * dst.pitch;
+				const u32 mem_length = dst.pitch * dst.clip_height;
+
+				lock.upgrade();
+
+				if (notify)
+				{
 					m_cache[get_block_address(cached_dest->get_section_base())].notify();
 				}
 				else if (cached_dest->is_synchronized())
 				{
-					//Prematurely read back
+					// Premature readback
 					m_num_cache_mispredictions++;
 				}
 
+				cached_dest->reprotect(utils::protection::no, { mem_base, mem_length });
 				cached_dest->touch();
 			}
+			else
+			{
+				verify(HERE), dst_is_render_target;
+			}
 
 			if (rsx::get_resolution_scale_percent() != 100)
 			{
@@ -2345,9 +2341,17 @@ namespace rsx
 						auto& section = find_cached_texture(It.first, It.second);
 						if (section.get_protection() != utils::protection::no)
 						{
-							//NOTE: find_cached_texture will increment block ctr
-							section.reprotect(utils::protection::no);
-							update_tag = true;
+							if (section.exists())
+							{
+								//NOTE: find_cached_texture will increment block ctr
+								section.reprotect(utils::protection::no);
+								update_tag = true;
+							}
+							else
+							{
+								//This should never happen
+								LOG_ERROR(RSX, "Reprotection attempted on destroyed framebuffer section @ 0x%x+0x%x", It.first, It.second);
+							}
 						}
 					}
 
@@ -2406,58 +2410,14 @@ namespace rsx
 
 		void tag_framebuffer(u32 texaddr)
 		{
-			if (!g_cfg.video.strict_rendering_mode)
-				return;
-
-			writer_lock lock(m_cache_mutex);
-
-			const auto protect_info = get_memory_protection(texaddr);
-			if (protect_info.first != utils::protection::rw)
-			{
-				if (protect_info.second->overlaps(texaddr, true))
-				{
-					if (protect_info.first == utils::protection::no)
-						return;
-
-					if (protect_info.second->get_context() != texture_upload_context::blit_engine_dst)
-					{
-						//TODO: Invalidate this section
-						LOG_TRACE(RSX, "Framebuffer memory occupied by regular texture!");
-					}
-				}
-
-				protect_info.second->unprotect();
-				vm::write32(texaddr, texaddr);
-				protect_info.second->protect(protect_info.first);
-				return;
-			}
-
-			vm::write32(texaddr, texaddr);
+			auto ptr = rsx::get_super_ptr(texaddr, 4).get<u32>();
+			*ptr = texaddr;
 		}
 
 		bool test_framebuffer(u32 texaddr)
 		{
-			if (!g_cfg.video.strict_rendering_mode)
-				return true;
-
-			if (g_cfg.video.write_color_buffers || g_cfg.video.write_depth_buffer)
-			{
-				writer_lock lock(m_cache_mutex);
-				auto protect_info = get_memory_protection(texaddr);
-				if (protect_info.first == utils::protection::no)
-				{
-					if (protect_info.second->overlaps(texaddr, true))
-						return true;
-
-					//Address isnt actually covered by the region, it only shares a page with it
-					protect_info.second->unprotect();
-					bool result = (vm::read32(texaddr) == texaddr);
-					protect_info.second->protect(utils::protection::no);
-					return result;
-				}
-			}
-
-			return vm::read32(texaddr) == texaddr;
+			auto ptr = rsx::get_super_ptr(texaddr, 4).get<u32>();
+			return *ptr == texaddr;
 		}
 	};
 }
diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h
index 96a75e34f8..3579cd634d 100644
--- a/rpcs3/Emu/RSX/GL/GLTextureCache.h
+++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h
@@ -477,12 +477,14 @@ namespace gl
 			m_fence.wait_for_signal();
 			flushed = true;
 
+			const auto valid_range = get_confirmed_range();
+			void *dst = get_raw_ptr(valid_range.first);
+
 			glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id);
-			void *data = glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, pbo_size, GL_MAP_READ_BIT);
-			u8 *dst = vm::_ptr<u8>(cpu_address_base);
+			void *src = glMapBufferRange(GL_PIXEL_PACK_BUFFER, valid_range.first, valid_range.second, GL_MAP_READ_BIT);
 
 			//throw if map failed since we'll segfault anyway
-			verify(HERE), data != nullptr;
+			verify(HERE), src != nullptr;
 
 			bool require_manual_shuffle = false;
 			if (pack_unpack_swap_bytes)
@@ -493,20 +495,17 @@ namespace gl
 
 			if (real_pitch >= rsx_pitch || scaled_texture != 0)
 			{
-				memcpy(dst, data, cpu_address_range);
+				memcpy(dst, src, valid_range.second);
 			}
 			else
 			{
-				const u8 pixel_size = get_pixel_size(format, type);
-				const u8 samples_u = (aa_mode == rsx::surface_antialiasing::center_1_sample) ? 1 : 2;
-				const u8 samples_v = (aa_mode == rsx::surface_antialiasing::square_centered_4_samples || aa_mode == rsx::surface_antialiasing::square_rotated_4_samples) ? 2 : 1;
-				rsx::scale_image_nearest(dst, const_cast<const void*>(data), width, height, rsx_pitch, real_pitch, pixel_size, samples_u, samples_v);
+				fmt::throw_exception("Unreachable");
 			}
 
 			if (require_manual_shuffle)
 			{
 				//byte swapping does not work on byte types, use uint_8_8_8_8 for rgba8 instead to avoid penalty
-				rsx::shuffle_texel_data_wzyx<u8>(dst, rsx_pitch, width, height);
+				rsx::shuffle_texel_data_wzyx<u8>(dst, rsx_pitch, width, valid_range.second / rsx_pitch);
 			}
 			else if (pack_unpack_swap_bytes && ::gl::get_driver_caps().vendor_AMD)
 			{
@@ -522,7 +521,7 @@ namespace gl
 				case texture::type::ushort_1_5_5_5_rev:
 				case texture::type::ushort_5_5_5_1:
 				{
-					const u32 num_reps = cpu_address_range / 2;
+					const u32 num_reps = valid_range.second / 2;
 					be_t<u16>* in = (be_t<u16>*)(dst);
 					u16* out = (u16*)dst;
 
@@ -541,7 +540,7 @@ namespace gl
 				case texture::type::uint_2_10_10_10_rev:
 				case texture::type::uint_8_8_8_8:
 				{
-					u32 num_reps = cpu_address_range / 4;
+					u32 num_reps = valid_range.second / 4;
 					be_t<u32>* in = (be_t<u32>*)(dst);
 					u32* out = (u32*)dst;
 
@@ -568,6 +567,13 @@ namespace gl
 			return result;
 		}
 
+		void reprotect(utils::protection prot, const std::pair<u32, u32>& range)
+		{
+			flushed = false;
+			synchronized = false;
+			protect(prot, range);
+		}
+
 		void reprotect(utils::protection prot)
 		{
 			flushed = false;
@@ -992,9 +998,9 @@ namespace gl
 					fmt::throw_exception("Unexpected gcm format 0x%X" HERE, gcm_format);
 				}
 
+				//NOTE: Protection is handled by the caller
 				cached.make_flushable();
 				cached.set_dimensions(width, height, depth, (rsx_size / height));
-				cached.protect(utils::protection::no);
 				no_access_range = cached.get_min_max(no_access_range);
 			}
 
@@ -1141,7 +1147,7 @@ namespace gl
 				if (tex.is_dirty())
 					continue;
 
-				if (!tex.overlaps(rsx_address, true))
+				if (!tex.overlaps(rsx_address, rsx::overlap_test_bounds::full_range))
 					continue;
 
 				if ((rsx_address + rsx_size - tex.get_section_base()) <= tex.get_section_size())
diff --git a/rpcs3/Emu/RSX/Overlays/overlays.cpp b/rpcs3/Emu/RSX/Overlays/overlays.cpp
index a0470929c2..faeeae381e 100644
--- a/rpcs3/Emu/RSX/Overlays/overlays.cpp
+++ b/rpcs3/Emu/RSX/Overlays/overlays.cpp
@@ -22,7 +22,7 @@ namespace rsx
 
 		void user_interface::refresh()
 		{
-			if (auto rsxthr = fxm::get<GSRender>())
+			if (auto rsxthr = rsx::get_current_renderer())
 			{
 				rsxthr->native_ui_flip_request.store(true);
 			}
diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp
index 3ba49bebed..f0804f98d6 100644
--- a/rpcs3/Emu/RSX/RSXThread.cpp
+++ b/rpcs3/Emu/RSX/RSXThread.cpp
@@ -30,6 +30,7 @@ rsx::frame_capture_data frame_capture;
 namespace rsx
 {
 	std::function<bool(u32 addr, bool is_writing)> g_access_violation_handler;
+	thread* g_current_renderer = nullptr;
 
 	//TODO: Restore a working shaders cache
 
@@ -239,10 +240,12 @@ namespace rsx
 
 	thread::thread()
 	{
+		g_current_renderer = this;
 		g_access_violation_handler = [this](u32 address, bool is_writing)
 		{
 			return on_access_violation(address, is_writing);
 		};
+
 		m_rtts_dirty = true;
 		memset(m_textures_dirty, -1, sizeof(m_textures_dirty));
 		memset(m_vertex_textures_dirty, -1, sizeof(m_vertex_textures_dirty));
@@ -253,6 +256,7 @@ namespace rsx
 	thread::~thread()
 	{
 		g_access_violation_handler = nullptr;
+		g_current_renderer = nullptr;
 	}
 
 	void thread::capture_frame(const std::string &name)
diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h
index 73a0c7d315..03f027d06a 100644
--- a/rpcs3/Emu/RSX/RSXThread.h
+++ b/rpcs3/Emu/RSX/RSXThread.h
@@ -322,6 +322,9 @@ namespace rsx
 		GcmTileInfo tiles[limits::tiles_count];
 		GcmZcullInfo zculls[limits::zculls_count];
 
+		//super memory map (mapped block with r/w permissions)
+		std::pair<u32, std::shared_ptr<u8>> super_memory_map;
+
 		bool capture_current_frame = false;
 		void capture_frame(const std::string &name);
 
diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp
index fa6cab8a32..89959852b8 100644
--- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp
+++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp
@@ -216,6 +216,13 @@ namespace vk
 	void set_current_renderer(const vk::render_device &device)
 	{
 		g_current_renderer = device;
+		g_cb_no_interrupt_flag.store(false);
+		g_drv_no_primitive_restart_flag = false;
+		g_drv_sanitize_fp_values = false;
+		g_drv_disable_fence_reset = false;
+		g_num_processed_frames = 0;
+		g_num_total_frames = 0;
+
 		const auto gpu_name = g_current_renderer.gpu().name();
 
 		//Radeon fails to properly handle degenerate primitives if primitive restart is enabled
diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h
index d857d50775..bd8d10b0c5 100644
--- a/rpcs3/Emu/RSX/VK/VKTextureCache.h
+++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h
@@ -205,26 +205,20 @@ namespace vk
 		}
 
 		template<typename T, bool swapped>
-		void do_memory_transfer(void *pixels_dst, const void *pixels_src, u32 channels_count)
+		void do_memory_transfer(void *pixels_dst, const void *pixels_src, u32 max_length)
 		{
-			if (sizeof(T) == 1)
-				memcpy(pixels_dst, pixels_src, cpu_address_range);
+			if (sizeof(T) == 1 || !swapped)
+			{
+				memcpy(pixels_dst, pixels_src, max_length);
+			}
 			else
 			{
-				const u32 block_size = width * height * channels_count;
+				const u32 block_size = max_length / sizeof(T);
+				auto typed_dst = (be_t<T> *)pixels_dst;
+				auto typed_src = (T *)pixels_src;
 
-				if (swapped)
-				{
-					auto typed_dst = (be_t<T> *)pixels_dst;
-					auto typed_src = (T *)pixels_src;
-
-					for (u32 px = 0; px < block_size; ++px)
-						typed_dst[px] = typed_src[px];
-				}
-				else
-				{
-					memcpy(pixels_dst, pixels_src, block_size * sizeof(T));
-				}
+				for (u32 px = 0; px < block_size; ++px)
+					typed_dst[px] = typed_src[px];
 			}
 		}
 
@@ -249,12 +243,12 @@ namespace vk
 
 			flushed = true;
 
-			void* pixels_src = dma_buffer->map(0, cpu_address_range);
-			void* pixels_dst = vm::base(cpu_address_base);
+			const auto valid_range = get_confirmed_range();
+			void* pixels_src = dma_buffer->map(valid_range.first, valid_range.second);
+			void* pixels_dst = get_raw_ptr(valid_range.first);
 
 			const auto texel_layout = vk::get_format_element_size(vram_texture->info.format);
 			const auto elem_size = texel_layout.first;
-			const auto channel_count = texel_layout.second;
 
 			//We have to do our own byte swapping since the driver doesnt do it for us
 			if (real_pitch == rsx_pitch)
@@ -263,10 +257,10 @@ namespace vk
 				switch (vram_texture->info.format)
 				{
 				case VK_FORMAT_D32_SFLOAT_S8_UINT:
-					rsx::convert_le_f32_to_be_d24(pixels_dst, pixels_src, cpu_address_range >> 2, 1);
+					rsx::convert_le_f32_to_be_d24(pixels_dst, pixels_src, valid_range.second >> 2, 1);
 					break;
 				case VK_FORMAT_D24_UNORM_S8_UINT:
-					rsx::convert_le_d24x8_to_be_d24x8(pixels_dst, pixels_src, cpu_address_range >> 2, 1);
+					rsx::convert_le_d24x8_to_be_d24x8(pixels_dst, pixels_src, valid_range.second >> 2, 1);
 					break;
 				default:
 					is_depth_format = false;
@@ -280,19 +274,19 @@ namespace vk
 					default:
 						LOG_ERROR(RSX, "Invalid element width %d", elem_size);
 					case 1:
-						do_memory_transfer<u8, false>(pixels_dst, pixels_src, channel_count);
+						do_memory_transfer<u8, false>(pixels_dst, pixels_src, valid_range.second);
 						break;
 					case 2:
 						if (pack_unpack_swap_bytes)
-							do_memory_transfer<u16, true>(pixels_dst, pixels_src, channel_count);
+							do_memory_transfer<u16, true>(pixels_dst, pixels_src, valid_range.second);
 						else
-							do_memory_transfer<u16, false>(pixels_dst, pixels_src, channel_count);
+							do_memory_transfer<u16, false>(pixels_dst, pixels_src, valid_range.second);
 						break;
 					case 4:
 						if (pack_unpack_swap_bytes)
-							do_memory_transfer<u32, true>(pixels_dst, pixels_src, channel_count);
+							do_memory_transfer<u32, true>(pixels_dst, pixels_src, valid_range.second);
 						else
-							do_memory_transfer<u32, false>(pixels_dst, pixels_src, channel_count);
+							do_memory_transfer<u32, false>(pixels_dst, pixels_src, valid_range.second);
 						break;
 					}
 				}
@@ -314,16 +308,17 @@ namespace vk
 					break;
 				}
 
-				u16 row_length = u16(width * channel_count);
-				rsx::scale_image_nearest(pixels_dst, pixels_src, row_length, height, rsx_pitch, real_pitch, elem_size, samples_u, samples_v, pack_unpack_swap_bytes);
+				const u16 row_length = u16(width * texel_layout.second);
+				const u16 usable_height = (valid_range.second / rsx_pitch) / samples_v;
+				rsx::scale_image_nearest(pixels_dst, pixels_src, row_length, usable_height, rsx_pitch, real_pitch, elem_size, samples_u, samples_v, pack_unpack_swap_bytes);
 
 				switch (vram_texture->info.format)
 				{
 				case VK_FORMAT_D32_SFLOAT_S8_UINT:
-					rsx::convert_le_f32_to_be_d24(pixels_dst, pixels_dst, cpu_address_range >> 2, 1);
+					rsx::convert_le_f32_to_be_d24(pixels_dst, pixels_dst, valid_range.second >> 2, 1);
 					break;
 				case VK_FORMAT_D24_UNORM_S8_UINT:
-					rsx::convert_le_d24x8_to_be_d24x8(pixels_dst, pixels_dst, cpu_address_range >> 2, 1);
+					rsx::convert_le_d24x8_to_be_d24x8(pixels_dst, pixels_dst, valid_range.second >> 2, 1);
 					break;
 				}
 			}
@@ -340,6 +335,16 @@ namespace vk
 			pack_unpack_swap_bytes = swap_bytes;
 		}
 
+		void reprotect(utils::protection prot, const std::pair<u32, u32>& range)
+		{
+			//Reset properties and protect again
+			flushed = false;
+			synchronized = false;
+			sync_timestamp = 0ull;
+
+			protect(prot, range);
+		}
+
 		void reprotect(utils::protection prot)
 		{
 			//Reset properties and protect again
@@ -896,7 +901,7 @@ namespace vk
 			else
 			{
 				//TODO: Confirm byte swap patterns
-				region.protect(utils::protection::no);
+				//NOTE: Protection is handled by the caller
 				region.set_unpack_swap_bytes((aspect_flags & VK_IMAGE_ASPECT_COLOR_BIT) == VK_IMAGE_ASPECT_COLOR_BIT);
 				no_access_range = region.get_min_max(no_access_range);
 			}
@@ -1077,7 +1082,7 @@ namespace vk
 				if (tex.is_dirty())
 					continue;
 
-				if (!tex.overlaps(rsx_address, true))
+				if (!tex.overlaps(rsx_address, rsx::overlap_test_bounds::full_range))
 					continue;
 
 				if ((rsx_address + rsx_size - tex.get_section_base()) <= tex.get_section_size())
diff --git a/rpcs3/Emu/RSX/rsx_cache.h b/rpcs3/Emu/RSX/rsx_cache.h
index b120d1137f..d045d01fc4 100644
--- a/rpcs3/Emu/RSX/rsx_cache.h
+++ b/rpcs3/Emu/RSX/rsx_cache.h
@@ -7,13 +7,22 @@
 #include "Emu/Cell/Modules/cellMsgDialog.h"
 #include "Emu/System.h"
 
+#include "rsx_utils.h"
+
 namespace rsx
 {
 	enum protection_policy
 	{
-		protect_policy_one_page,	//Only guard one page, preferably one where this section 'wholly' fits
+		protect_policy_one_page,     //Only guard one page, preferrably one where this section 'wholly' fits
 		protect_policy_conservative, //Guards as much memory as possible that is guaranteed to only be covered by the defined range without sharing
-		protect_policy_full_range	//Guard the full memory range. Shared pages may be invalidated by access outside the object we're guarding
+		protect_policy_full_range    //Guard the full memory range. Shared pages may be invalidated by access outside the object we're guarding
+	};
+
+	enum overlap_test_bounds
+	{
+		full_range,
+		protected_range,
+		confirmed_range
 	};
 
 	class buffered_section
@@ -21,6 +30,21 @@ namespace rsx
 	private:
 		u32 locked_address_base = 0;
 		u32 locked_address_range = 0;
+		weak_ptr locked_memory_ptr;
+		std::pair<u32, u32> confirmed_range;
+
+		inline void tag_memory()
+		{
+			if (locked_memory_ptr)
+			{
+				const u32 valid_limit = (confirmed_range.second) ? confirmed_range.first + confirmed_range.second : cpu_address_range;
+				u32* first = locked_memory_ptr.get<u32>(confirmed_range.first);
+				u32* last = locked_memory_ptr.get<u32>(valid_limit - 4);
+
+				*first = cpu_address_base + confirmed_range.first;
+				*last = cpu_address_base + valid_limit - 4;
+			}
+		}
 
 	protected:
 		u32 cpu_address_base = 0;
@@ -37,21 +61,11 @@ namespace rsx
 			return (base1 < limit2 && base2 < limit1);
 		}
 
-	public:
-
-		buffered_section() {}
-		~buffered_section() {}
-
-		void reset(u32 base, u32 length, protection_policy protect_policy = protect_policy_full_range)
+		inline void init_lockable_range(u32 base, u32 length)
 		{
-			verify(HERE), locked == false;
-
-			cpu_address_base = base;
-			cpu_address_range = length;
-
 			locked_address_base = (base & ~4095);
 
-			if ((protect_policy != protect_policy_full_range) && (length >= 4096))
+			if ((guard_policy != protect_policy_full_range) && (length >= 4096))
 			{
 				const u32 limit = base + length;
 				const u32 block_end = (limit & ~4095);
@@ -64,7 +78,7 @@ namespace rsx
 					//Page boundaries cover at least one unique page
 					locked_address_base = block_start;
 
-					if (protect_policy == protect_policy_conservative)
+					if (guard_policy == protect_policy_conservative)
 					{
 						//Protect full unique range
 						locked_address_range = (block_end - block_start);
@@ -75,24 +89,83 @@ namespace rsx
 				locked_address_range = align(base + length, 4096) - locked_address_base;
 
 			verify(HERE), locked_address_range > 0;
+		}
+
+	public:
+
+		buffered_section() {}
+		~buffered_section() {}
+
+		void reset(u32 base, u32 length, protection_policy protect_policy = protect_policy_full_range)
+		{
+			verify(HERE), locked == false;
+
+			cpu_address_base = base;
+			cpu_address_range = length;
+
+			confirmed_range = { 0, 0 };
 			protection = utils::protection::rw;
 			guard_policy = protect_policy;
 			locked = false;
+
+			init_lockable_range(cpu_address_base, cpu_address_range);
 		}
 
 		void protect(utils::protection prot)
 		{
 			if (prot == protection) return;
+
 			verify(HERE), locked_address_range > 0;
 			utils::memory_protect(vm::base(locked_address_base), locked_address_range, prot);
 			protection = prot;
 			locked = prot != utils::protection::rw;
+
+			if (prot == utils::protection::no)
+			{
+				locked_memory_ptr = rsx::get_super_ptr(cpu_address_base, cpu_address_range);
+				tag_memory();
+			}
+			else
+			{
+				if (!locked)
+				{
+					//Unprotect range also invalidates secured range
+					confirmed_range = { 0, 0 };
+				}
+
+				locked_memory_ptr = {};
+			}
+		}
+
+		void protect(utils::protection prot, const std::pair<u32, u32>& range_confirm)
+		{
+			if (prot != utils::protection::rw)
+			{
+				const auto old_prot = protection;
+				const auto old_locked_base = locked_address_base;
+				const auto old_locked_length = locked_address_range;
+				protection = utils::protection::rw;
+
+				if (confirmed_range.second)
+				{
+					const u32 range_limit = std::max(range_confirm.first + range_confirm.second, confirmed_range.first + confirmed_range.second);
+					confirmed_range.first = std::min(confirmed_range.first, range_confirm.first);
+					confirmed_range.second = range_limit - confirmed_range.first;
+				}
+				else
+				{
+					confirmed_range = range_confirm;
+				}
+
+				init_lockable_range(confirmed_range.first + cpu_address_base, confirmed_range.second);
+			}
+
+			protect(prot);
 		}
 
 		void unprotect()
 		{
 			protect(utils::protection::rw);
-			locked = false;
 		}
 
 		void discard()
@@ -112,27 +185,55 @@ namespace rsx
 			return region_overlaps(locked_address_base, locked_address_base + locked_address_range, range.first, range.first + range.second);
 		}
 
-		bool overlaps(u32 address, bool ignore_protection_range) const
+		bool overlaps(u32 address, overlap_test_bounds bounds) const
 		{
-			if (!ignore_protection_range)
-				return (locked_address_base <= address && (address - locked_address_base) < locked_address_range);
-			else
+			switch (bounds)
+			{
+			case overlap_test_bounds::full_range:
+			{
 				return (cpu_address_base <= address && (address - cpu_address_base) < cpu_address_range);
+			}
+			case overlap_test_bounds::protected_range:
+			{
+				return (locked_address_base <= address && (address - locked_address_base) < locked_address_range);
+			}
+			case overlap_test_bounds::confirmed_range:
+			{
+				const auto range = get_confirmed_range();
+				return ((range.first + cpu_address_base) <= address && (address - range.first) < range.second);
+			}
+			default:
+				fmt::throw_exception("Unreachable" HERE);
+			}
 		}
 
-		bool overlaps(std::pair<u32, u32> range, bool ignore_protection_range) const
+		bool overlaps(const std::pair<u32, u32>& range, overlap_test_bounds bounds) const
 		{
-			if (!ignore_protection_range)
-				return region_overlaps(locked_address_base, locked_address_base + locked_address_range, range.first, range.first + range.second);
-			else
+			switch (bounds)
+			{
+			case overlap_test_bounds::full_range:
+			{
 				return region_overlaps(cpu_address_base, cpu_address_base + cpu_address_range, range.first, range.first + range.second);
+			}
+			case overlap_test_bounds::protected_range:
+			{
+				return region_overlaps(locked_address_base, locked_address_base + locked_address_range, range.first, range.first + range.second);
+			}
+			case overlap_test_bounds::confirmed_range:
+			{
+				const auto test_range = get_confirmed_range();
+				return region_overlaps(test_range.first + cpu_address_base, test_range.first + cpu_address_base + test_range.second, range.first, range.first + range.second);
+			}
+			default:
+				fmt::throw_exception("Unreachable" HERE);
+			}
 		}
 
 		/**
 		 * Check if the page containing the address tramples this section. Also compares a former trampled page range to compare
 		 * If true, returns the range <min, max> with updated invalid range
 		 */
-		std::tuple<bool, std::pair<u32, u32>> overlaps_page(std::pair<u32, u32> old_range, u32 address, bool full_range_check) const
+		std::tuple<bool, std::pair<u32, u32>> overlaps_page(const std::pair<u32, u32>& old_range, u32 address, overlap_test_bounds bounds) const
 		{
 			const u32 page_base = address & ~4095;
 			const u32 page_limit = address + 4096;
@@ -141,16 +242,29 @@ namespace rsx
 			const u32 compare_max = std::max(old_range.second, page_limit);
 
 			u32 memory_base, memory_range;
-			if (full_range_check && guard_policy != protection_policy::protect_policy_full_range)
+			switch (bounds)
+			{
+			case overlap_test_bounds::full_range:
 			{
-				//Make sure protection range is full range
 				memory_base = (cpu_address_base & ~4095);
 				memory_range = align(cpu_address_base + cpu_address_range, 4096u) - memory_base;
+				break;
 			}
-			else
+			case overlap_test_bounds::protected_range:
 			{
 				memory_base = locked_address_base;
 				memory_range = locked_address_range;
+				break;
+			}
+			case overlap_test_bounds::confirmed_range:
+			{
+				const auto range = get_confirmed_range();
+				memory_base = (cpu_address_base + range.first) & ~4095;
+				memory_range = align(cpu_address_base + range.first + range.second, 4096u) - memory_base;
+				break;
+			}
+			default:
+				fmt::throw_exception("Unreachable" HERE);
 			}
 
 			if (!region_overlaps(memory_base, memory_base + memory_range, compare_min, compare_max))
@@ -191,7 +305,7 @@ namespace rsx
 			return (cpu_address_base == cpu_address && cpu_address_range == size);
 		}
 
-		std::pair<u32, u32> get_min_max(std::pair<u32, u32> current_min_max) const
+		std::pair<u32, u32> get_min_max(const std::pair<u32, u32>& current_min_max) const
 		{
 			u32 min = std::min(current_min_max.first, locked_address_base);
 			u32 max = std::max(current_min_max.second, locked_address_base + locked_address_range);
@@ -203,6 +317,46 @@ namespace rsx
 		{
 			return protection;
 		}
+
+		template <typename T = void>
+		T* get_raw_ptr(u32 offset = 0) const
+		{
+			verify(HERE), locked_memory_ptr;
+			return locked_memory_ptr.get<T>(offset);
+		}
+
+		bool test_memory_head() const
+		{
+			if (!locked_memory_ptr)
+			{
+				return false;
+			}
+
+			const u32* first = locked_memory_ptr.get<u32>(confirmed_range.first);
+			return (*first == (cpu_address_base + confirmed_range.first));
+		}
+
+		bool test_memory_tail() const
+		{
+			if (!locked_memory_ptr)
+			{
+				return false;
+			}
+
+			const u32 valid_limit = (confirmed_range.second) ? confirmed_range.first + confirmed_range.second : cpu_address_range;
+			const u32* last = locked_memory_ptr.get<u32>(valid_limit - 4);
+			return (*last == (cpu_address_base + valid_limit - 4));
+		}
+
+		std::pair<u32, u32> get_confirmed_range() const
+		{
+			if (confirmed_range.second == 0)
+			{
+				return { 0, cpu_address_range };
+			}
+
+			return confirmed_range;
+		}
 	};
 
 	template <typename pipeline_storage_type, typename backend_storage>
diff --git a/rpcs3/Emu/RSX/rsx_utils.cpp b/rpcs3/Emu/RSX/rsx_utils.cpp
index b1bc398293..e18898723e 100644
--- a/rpcs3/Emu/RSX/rsx_utils.cpp
+++ b/rpcs3/Emu/RSX/rsx_utils.cpp
@@ -1,6 +1,7 @@
 #include "stdafx.h"
 #include "rsx_utils.h"
 #include "rsx_methods.h"
+#include "RSXThread.h"
 #include "Emu/RSX/GCM.h"
 #include "Common/BufferUtils.h"
 #include "Overlays/overlays.h"
@@ -75,6 +76,49 @@ namespace rsx
 		}
 	}
 
+	weak_ptr get_super_ptr(u32 addr, u32 len)
+	{
+		verify(HERE), g_current_renderer;
+
+		if (!g_current_renderer->super_memory_map.first)
+		{
+			auto block = vm::get(vm::any, 0xC0000000);
+			if (block)
+			{
+				g_current_renderer->super_memory_map.first = block->used();
+				g_current_renderer->super_memory_map.second = vm::get_super_ptr<u8>(0xC0000000, g_current_renderer->super_memory_map.first - 1);
+
+				if (!g_current_renderer->super_memory_map.second)
+				{
+					//Disjoint allocation?
+					LOG_ERROR(RSX, "Could not initialize contiguous RSX super-memory");
+				}
+			}
+			else
+			{
+				fmt::throw_exception("RSX memory not mapped!");
+			}
+		}
+
+		if (g_current_renderer->super_memory_map.second)
+		{
+			if (addr >= 0xC0000000 && (addr + len) <= (0xC0000000 + g_current_renderer->super_memory_map.first))
+			{
+				//RSX local
+				return { g_current_renderer->super_memory_map.second.get() + (addr - 0xC0000000) };
+			}
+		}
+
+		auto result = vm::get_super_ptr<u8>(addr, len - 1);
+		if (!result)
+		{
+			//Probably allocated as split blocks??
+			LOG_ERROR(RSX, "Could not get super_ptr for memory block 0x%x+0x%x", addr, len);
+		}
+
+		return { result };
+	}
+
 	/* Fast image scaling routines
 	* Only uses fast nearest scaling and integral scaling factors
 	* T - Dst type
diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h
index 7af460b4b3..c52ea6d6a4 100644
--- a/rpcs3/Emu/RSX/rsx_utils.h
+++ b/rpcs3/Emu/RSX/rsx_utils.h
@@ -4,6 +4,7 @@
 #include "Utilities/geometry.h"
 #include "gcm_enums.h"
 #include <atomic>
+#include <memory>
 
 // TODO: replace the code below by #include <optional> when C++17 or newer will be used
 #include <optional.hpp>
@@ -20,6 +21,9 @@ extern "C"
 
 namespace rsx
 {
+	class thread;
+	extern thread* g_current_renderer;
+
 	//Base for resources with reference counting
 	struct ref_counted
 	{
@@ -28,6 +32,43 @@ namespace rsx
 		void reset_refs() { deref_count = 0; }
 	};
 
+	//Weak pointer without lock semantics
+	//Backed by a real shared_ptr for non-rsx memory
+	//Backed by a global shared pool for rsx memory
+	struct weak_ptr
+	{
+		void* _ptr;
+		std::shared_ptr<u8> _extern;
+
+		weak_ptr(void* raw, bool is_rsx_mem = true)
+		{
+			_ptr = raw;
+			if (!is_rsx_mem) _extern.reset((u8*)raw);
+		}
+
+		weak_ptr(std::shared_ptr<u8>& block)
+		{
+			_extern = block;
+			_ptr = _extern.get();
+		}
+
+		weak_ptr()
+		{
+			_ptr = nullptr;
+		}
+
+		template <typename T = void>
+		T* get(u32 offset = 0) const
+		{
+			return (T*)((u8*)_ptr + offset);
+		}
+
+		operator bool() const
+		{
+			return (_ptr != nullptr);
+		}
+	};
+
 	//Holds information about a framebuffer
 	struct gcm_framebuffer_info
 	{
@@ -289,6 +330,9 @@ namespace rsx
 
 	std::array<float, 4> get_constant_blend_colors();
 
+	// Acquire memory mirror with r/w permissions
+	weak_ptr get_super_ptr(u32 addr, u32 size);
+
 	/**
 	 * Shuffle texel layout from xyzw to wzyx
 	 * TODO: Variable src/dst and optional se conversion
@@ -498,4 +542,9 @@ namespace rsx
 		result.a = ((colorref >> 24) & 0xFF) / 255.f;
 		return result;
 	}
+
+	static inline thread* get_current_renderer()
+	{
+		return g_current_renderer;
+	}
 }
diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj
index e1156e9bfe..5b6ca8b473 100644
--- a/rpcs3/emucore.vcxproj
+++ b/rpcs3/emucore.vcxproj
@@ -424,7 +424,7 @@
     <ClInclude Include="Emu\Cell\lv2\sys_ppu_thread.h" />
     <ClInclude Include="Emu\Cell\lv2\sys_process.h" />
     <ClInclude Include="Emu\Cell\lv2\sys_prx.h" />
-    <ClInclude Include="Emu\Cell\lv2\sys_rsx.h" />
+    <ClInclude Include="Emu\Cell\lv2\sys_rsx.h" />
     <ClInclude Include="Emu\Cell\lv2\sys_rwlock.h" />
     <ClInclude Include="Emu\Cell\lv2\sys_semaphore.h" />
     <ClInclude Include="Emu\Cell\lv2\sys_spu.h" />
diff --git a/rpcs3/rpcs3qt/rsx_debugger.cpp b/rpcs3/rpcs3qt/rsx_debugger.cpp
index b4298a5aa7..e22aae163b 100644
--- a/rpcs3/rpcs3qt/rsx_debugger.cpp
+++ b/rpcs3/rpcs3qt/rsx_debugger.cpp
@@ -213,7 +213,7 @@ rsx_debugger::rsx_debugger(std::shared_ptr<gui_settings> gui_settings, QWidget*
 	//Events
 	connect(b_goto_get, &QAbstractButton::clicked, [=]
 	{
-		if (const auto render = fxm::get<GSRender>())
+		if (const auto render = rsx::get_current_renderer())
 		{
 			u32 realAddr;
 			if (RSXIOMem.getRealAddr(render->ctrl->get.load(), realAddr))
@@ -225,7 +225,7 @@ rsx_debugger::rsx_debugger(std::shared_ptr<gui_settings> gui_settings, QWidget*
 	});
 	connect(b_goto_put, &QAbstractButton::clicked, [=]
 	{
-		if (const auto render = fxm::get<GSRender>())
+		if (const auto render = rsx::get_current_renderer())
 		{
 			u32 realAddr;
 			if (RSXIOMem.getRealAddr(render->ctrl->put.load(), realAddr))
@@ -377,7 +377,7 @@ void Buffer::showImage(const QImage& image)
 
 void Buffer::ShowWindowed()
 {
-	const auto render = fxm::get<GSRender>();
+	const auto render = rsx::get_current_renderer();
 	if (!render)
 		return;
 
@@ -667,7 +667,7 @@ void rsx_debugger::GetMemory()
 
 void rsx_debugger::GetBuffers()
 {
-	const auto render = fxm::get<GSRender>();
+	const auto render = rsx::get_current_renderer();
 	if (!render)
 	{
 		return;
@@ -744,7 +744,7 @@ void rsx_debugger::GetBuffers()
 
 void rsx_debugger::GetFlags()
 {
-	const auto render = fxm::get<GSRender>();
+	const auto render = rsx::get_current_renderer();
 	if (!render)
 	{
 		return;
@@ -781,7 +781,7 @@ void rsx_debugger::GetFlags()
 
 void rsx_debugger::GetLightning()
 {
-	const auto render = fxm::get<GSRender>();
+	const auto render = rsx::get_current_renderer();
 	if (!render)
 	{
 		return;
@@ -800,7 +800,7 @@ void rsx_debugger::GetLightning()
 
 void rsx_debugger::GetTexture()
 {
-	const auto render = fxm::get<GSRender>();
+	const auto render = rsx::get_current_renderer();
 	if (!render)
 	{
 		return;
@@ -843,7 +843,7 @@ void rsx_debugger::GetTexture()
 
 void rsx_debugger::GetSettings()
 {
-	const auto render = fxm::get<GSRender>();
+	const auto render = rsx::get_current_renderer();
 	if (!render)
 	{
 		return;
@@ -942,7 +942,7 @@ void rsx_debugger::SetFlags()
 
 void rsx_debugger::SetPrograms()
 {
-	const auto render = fxm::get<GSRender>();
+	const auto render = rsx::get_current_renderer();
 	if (!render)
 	{
 		return;