From b067688c8eeafe39f3ea2e8d275dc4edb65f3dfb Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 20 Nov 2025 01:45:03 +0300 Subject: [PATCH 1/7] rsx: Drop meaningless log message - This used to be a big deal a decade ago, now its just wasting CPU cycles and filling up log files --- rpcs3/Emu/RSX/Common/texture_cache_utils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/Common/texture_cache_utils.h b/rpcs3/Emu/RSX/Common/texture_cache_utils.h index 8c5defdd0b..c34b73f7ba 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache_utils.h +++ b/rpcs3/Emu/RSX/Common/texture_cache_utils.h @@ -1496,7 +1496,7 @@ namespace rsx void on_miss() { - rsx_log.warning("Cache miss at address 0x%X. This is gonna hurt...", get_section_base()); + // rsx_log.trace("Cache miss at address 0x%X. This is gonna hurt...", get_section_base()); m_tex_cache->on_miss(*derived()); } From ff72f944bade6f9262f3dd674681a76fbfbadb54 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 20 Nov 2025 01:45:50 +0300 Subject: [PATCH 2/7] rsx/vk: Add support for 8 and 16-bit texel GPU-accelerated deswizzle --- rpcs3/Emu/RSX/Common/TextureUtils.cpp | 125 ++++++++---------- rpcs3/Emu/RSX/Common/surface_store.h | 2 +- .../Program/GLSLSnippets/GPUDeswizzle.glsl | 73 +++++++++- rpcs3/Emu/RSX/VK/VKCompute.h | 24 +++- rpcs3/Emu/RSX/VK/VKTexture.cpp | 24 +++- 5 files changed, 158 insertions(+), 90 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.cpp b/rpcs3/Emu/RSX/Common/TextureUtils.cpp index 382ef9cc06..96f87111ff 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.cpp +++ b/rpcs3/Emu/RSX/Common/TextureUtils.cpp @@ -1096,80 +1096,65 @@ namespace rsx fmt::throw_exception("Wrong format 0x%x", format); } - if (word_size) + if (!word_size) { - if (word_size == 1) + return result; + } + + result.element_size = word_size; + result.block_length = words_per_block; + + bool require_cpu_swizzle = !caps.supports_hw_deswizzle && is_swizzled; + bool require_cpu_byteswap = word_size > 1 && !caps.supports_byteswap; + + if (is_swizzled && caps.supports_hw_deswizzle) + { + result.require_deswizzle = true; + } + + if (!require_cpu_byteswap && !require_cpu_swizzle) + { + result.require_swap = (word_size > 1); + + if (caps.supports_zero_copy) { - if (is_swizzled) - { - copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); - } - else if (caps.supports_zero_copy) - { - result.require_upload = true; - result.deferred_cmds = build_transfer_cmds(src_layout.data.data(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); - } - else - { - copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); - } + result.require_upload = true; + result.deferred_cmds = build_transfer_cmds(src_layout.data.data(), word_size * words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); } + else if (word_size == 1) + { + copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); + } + else if (word_size == 2) + { + copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); + } + else if (word_size == 4) + { + copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); + } + + return result; + } + + if (word_size == 1) + { + ensure(is_swizzled); + copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); + } + else if (word_size == 2) + { + if (is_swizzled) + copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); else - { - result.element_size = word_size; - result.block_length = words_per_block; - - bool require_cpu_swizzle = !caps.supports_hw_deswizzle && is_swizzled; - bool require_cpu_byteswap = !caps.supports_byteswap; - - if (is_swizzled && caps.supports_hw_deswizzle) - { - if (word_size == 4 || (((word_size * words_per_block) & 3) == 0)) - { - result.require_deswizzle = true; - } - else - { - require_cpu_swizzle = true; - } - } - - if (!require_cpu_byteswap && !require_cpu_swizzle) - { - result.require_swap = true; - - if (caps.supports_zero_copy) - { - result.require_upload = true; - result.deferred_cmds = build_transfer_cmds(src_layout.data.data(), word_size * words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); - } - else if (word_size == 2) - { - copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); - } - else if (word_size == 4) - { - copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); - } - } - else - { - if (word_size == 2) - { - if (is_swizzled) - copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); - else - copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); - } - else if (word_size == 4) - { - if (is_swizzled) - copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); - else - copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); - } - } - } + copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); + } + else if (word_size == 4) + { + if (is_swizzled) + copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); + else + copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); } return result; diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index 5fa595a80b..4476930607 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -1219,7 +1219,7 @@ namespace rsx if (result.size() > 1) { - std::sort(result.begin(), result.end(), [](const auto &a, const auto &b) + result.sort([](const auto &a, const auto &b) { if (a.surface->last_use_tag == b.surface->last_use_tag) { diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl index c2d679db6e..17d801c877 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl @@ -3,6 +3,9 @@ R"( #define SSBO_LOCATION(x) (x + %loc) +#define USE_8BIT_ADDRESSING %_8bit +#define USE_16BIT_ADDRESSING %_16bit + layout(local_size_x = %ws, local_size_y = 1, local_size_z = 1) in; layout(%set, binding=SSBO_LOCATION(0), std430) buffer ssbo0{ uint data_in[]; }; @@ -98,12 +101,57 @@ uint get_z_index(const in uint x_, const in uint y_, const in uint z_) return offset; } +#if USE_16BIT_ADDRESSING + +void write16(inout uint accumulator, const in uint subword, const in uint src_id, const in uint dst_id) +{ + const uint masks[] = { 0x0000FFFF, 0xFFFF0000 }; + accumulator |= data_in[src_id / 2] & masks[subword]; + + if (subword == 1) + { + data_out[dst_id / 2] = accumulator; + } +} + +#elif USE_8BIT_ADDRESSING + +void write8(inout uint accumulator, const in uint subword, const in uint src_id, const in uint dst_id) +{ + const uint masks[] = { 0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000 }; + accumulator |= data_in[src_id / 4] & masks[subword]; + + if (subword == 3) + { + data_out[dst_id / 4] = accumulator; + } +} + +#else + +void write32(const in uint word_count, in uint src_id, in uint dst_id) +{ + for (uint i = 0; i < word_count; ++i) + { + uint value = data_in[src_id++]; + data_out[dst_id++] = %f(value); + } +} + +#endif + void main() { uint invocations_x = (gl_NumWorkGroups.x * gl_WorkGroupSize.x); uint texel_id = (gl_GlobalInvocationID.y * invocations_x) + gl_GlobalInvocationID.x; uint word_count = %_wordcount; +#if USE_8BIT_ADDRESSING + texel_id *= 4; // Each invocation consumes 4 texels +#elif USE_16BIT_ADDRESSING + texel_id *= 2; // Each invocation consumes 2 texels +#endif + if (!init_invocation_properties(texel_id)) return; @@ -116,14 +164,25 @@ void main() uint y = (slice_offset / row_length); uint x = (slice_offset % row_length); - uint src_texel_id = get_z_index(x, y, z); - uint dst_id = (texel_id * word_count); - uint src_id = (src_texel_id + invocation.data_offset) * word_count; +#if USE_8BIT_ADDRESSING + for (uint subword = 0, accumulator = 0; subword < 4; ++subword, ++x) { +#elif USE_16BIT_ADDRESSING + for (uint subword = 0, temp = 0; subword < 2; ++subword, ++x) { +#endif - for (uint i = 0; i < word_count; ++i) - { - uint value = data_in[src_id++]; - data_out[dst_id++] = %f(value); + uint src_texel_id = get_z_index(x, y, z); + uint dst_id = (texel_id * word_count); + uint src_id = (src_texel_id + invocation.data_offset) * word_count; + +#if USE_8BIT_ADDRESSING + write8(accumulator, subword, src_id, dst_id); } +#elif USE_16BIT_ADDRESSING + write16(accumulator, subword, src_id, dst_id); + } +#else + write32(word_count, src_id, dst_id); +#endif + } )" diff --git a/rpcs3/Emu/RSX/VK/VKCompute.h b/rpcs3/Emu/RSX/VK/VKCompute.h index 6e8f37772a..ec5e8d32a6 100644 --- a/rpcs3/Emu/RSX/VK/VKCompute.h +++ b/rpcs3/Emu/RSX/VK/VKCompute.h @@ -403,8 +403,6 @@ namespace vk cs_deswizzle_3d() { - ensure((sizeof(_BlockType) & 3) == 0); // "Unsupported block type" - ssbo_count = 2; use_push_constants = true; push_constants_size = 28; @@ -438,8 +436,10 @@ namespace vk { "%set", "set = 0" }, { "%push_block", "push_constant" }, { "%ws", std::to_string(optimal_group_size) }, - { "%_wordcount", std::to_string(sizeof(_BlockType) / 4) }, - { "%f", transform } + { "%_wordcount", std::to_string(std::max(sizeof(_BlockType) / 4u, 1u)) }, + { "%f", transform }, + { "%_8bit", sizeof(_BlockType) == 1 ? "1" : "0" }, + { "%_16bit", sizeof(_BlockType) == 2 ? "1" : "0" }, }; m_src = fmt::replace_all(m_src, syntax_replace); @@ -476,7 +476,21 @@ namespace vk params.logd = rsx::ceil_log2(depth); const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size); - const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation); + u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation); + + // Check if we need to do subaddressing and adjust invocation count accordingly + switch (sizeof(_BlockType)) + { + case 1: + linear_invocations /= 4; + break; + case 2: + linear_invocations /= 2; + break; + default: + break; + } + compute_task::run(cmd, linear_invocations); } }; diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index 413333d500..a57378384a 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -759,6 +759,10 @@ namespace vk { switch (block_size) { + case 1: + return vk::get_compute_task>(); + case 2: + return vk::get_compute_task>(); case 4: return vk::get_compute_task>(); case 8: @@ -776,21 +780,27 @@ namespace vk vk::cs_deswizzle_base* job = nullptr; const auto block_size = (word_size * word_count); - ensure(word_size == 4 || word_size == 2); - if (!swap_bytes) { - if (word_size == 4) - { - job = get_deswizzle_transformation(block_size); - } - else + switch (word_size) { + case 1: + job = get_deswizzle_transformation(block_size); + break; + case 2: job = get_deswizzle_transformation(block_size); + break; + case 4: + job = get_deswizzle_transformation(block_size); + break; + default: + fmt::throw_exception("Unimplemented deswizzle for format."); } } else { + ensure(word_size == 2 || word_size == 4); + if (word_size == 4) { job = get_deswizzle_transformation(block_size); From cffc13696d9016fdbcddcc9ef8bab7b19d875409 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 20 Nov 2025 02:04:23 +0300 Subject: [PATCH 3/7] gl: Implement hardware deswizzle for small texel formats --- rpcs3/Emu/RSX/GL/GLCompute.h | 11 ++++---- rpcs3/Emu/RSX/GL/GLTexture.cpp | 47 ++++++++++++++++++++++++++-------- rpcs3/Emu/RSX/VK/VKCompute.h | 17 ++---------- 3 files changed, 44 insertions(+), 31 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLCompute.h b/rpcs3/Emu/RSX/GL/GLCompute.h index 54458c1f1c..442d8e4a0d 100644 --- a/rpcs3/Emu/RSX/GL/GLCompute.h +++ b/rpcs3/Emu/RSX/GL/GLCompute.h @@ -263,8 +263,6 @@ namespace gl cs_deswizzle_3d() { - ensure((sizeof(_BlockType) & 3) == 0); // "Unsupported block type" - initialize(); m_src = @@ -294,8 +292,10 @@ namespace gl { "%loc", std::to_string(GL_COMPUTE_BUFFER_SLOT(0))}, { "%push_block", fmt::format("binding=%d, std140", GL_COMPUTE_BUFFER_SLOT(2)) }, { "%ws", std::to_string(optimal_group_size) }, - { "%_wordcount", std::to_string(sizeof(_BlockType) / 4) }, - { "%f", transform } + { "%_wordcount", std::to_string(std::max(sizeof(_BlockType) / 4u, 1u)) }, + { "%f", transform }, + { "%_8bit", sizeof(_BlockType) == 1 ? "1" : "0" }, + { "%_16bit", sizeof(_BlockType) == 2 ? "1" : "0" }, }; m_src = fmt::replace_all(m_src, syntax_replace); @@ -339,7 +339,8 @@ namespace gl set_parameters(cmd); const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size); - const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation); + const u32 texels_per_dword = std::max(4u / sizeof(_BlockType), 1u); // For block sizes less than 4 bytes wide + const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation) / texels_per_dword; compute_task::run(cmd, linear_invocations); } }; diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index 0c34690bf4..3d4632b4e1 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -36,6 +36,16 @@ namespace gl { switch (block_size) { + case 1: + gl::get_compute_task>()->run( + cmd, dst, dst_offset, src, src_offset, + data_length, width, height, depth, 1); + break; + case 2: + gl::get_compute_task>()->run( + cmd, dst, dst_offset, src, src_offset, + data_length, width, height, depth, 1); + break; case 4: gl::get_compute_task>()->run( cmd, dst, dst_offset, src, src_offset, @@ -748,39 +758,54 @@ namespace gl g_upload_transfer_buffer.copy_to(&g_deswizzle_scratch_buffer.get(), upload_scratch_mem.second, deswizzle_data_offset, static_cast(image_linear_size)); // 2.2 Apply compute transform to deswizzle input and dump it in compute_scratch_mem - ensure(op.element_size == 2 || op.element_size == 4); const auto block_size = op.element_size * op.block_length; if (op.require_swap) { mem_layout.swap_bytes = false; - if (op.element_size == 4) [[ likely ]] + switch (op.element_size) { - do_deswizzle_transformation(cmd, block_size, + case 1: + do_deswizzle_transformation(cmd, block_size, &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); - } - else - { + break; + case 2: do_deswizzle_transformation(cmd, block_size, &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + break; + case 4: + do_deswizzle_transformation(cmd, block_size, + &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, + static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + break; + default: + fmt::throw_exception("Unimplemented element size deswizzle"); } } else { - if (op.element_size == 4) [[ likely ]] + switch (op.element_size) { - do_deswizzle_transformation(cmd, block_size, + case 1: + do_deswizzle_transformation(cmd, block_size, &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); - } - else - { + break; + case 2: do_deswizzle_transformation(cmd, block_size, &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + break; + case 4: + do_deswizzle_transformation(cmd, block_size, + &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, + static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + break; + default: + fmt::throw_exception("Unimplemented element size deswizzle"); } } diff --git a/rpcs3/Emu/RSX/VK/VKCompute.h b/rpcs3/Emu/RSX/VK/VKCompute.h index ec5e8d32a6..a62d93ec74 100644 --- a/rpcs3/Emu/RSX/VK/VKCompute.h +++ b/rpcs3/Emu/RSX/VK/VKCompute.h @@ -476,21 +476,8 @@ namespace vk params.logd = rsx::ceil_log2(depth); const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size); - u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation); - - // Check if we need to do subaddressing and adjust invocation count accordingly - switch (sizeof(_BlockType)) - { - case 1: - linear_invocations /= 4; - break; - case 2: - linear_invocations /= 2; - break; - default: - break; - } - + const u32 texels_per_dword = std::max(4u / sizeof(_BlockType), 1u); // For block sizes less than 4 bytes wide + const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation) / texels_per_dword; compute_task::run(cmd, linear_invocations); } }; From 3c3197d72dad43eba6e9c9d42b1ac2063ba619f6 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 20 Nov 2025 10:04:20 +0300 Subject: [PATCH 4/7] gl: Fix crashes when creating new context --- rpcs3/rpcs3qt/gl_gs_frame.cpp | 52 +++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/rpcs3/rpcs3qt/gl_gs_frame.cpp b/rpcs3/rpcs3qt/gl_gs_frame.cpp index 94b6aa964f..10098f6cfd 100644 --- a/rpcs3/rpcs3qt/gl_gs_frame.cpp +++ b/rpcs3/rpcs3qt/gl_gs_frame.cpp @@ -35,37 +35,43 @@ void gl_gs_frame::reset() draw_context_t gl_gs_frame::make_context() { + // This whole function should run in the main GUI thread. + // This really matters on Windows where a lot of wgl internals are stashed in the TEB. + auto context = new GLContext(); context->handle = new QOpenGLContext(); + bool success = true; - if (m_primary_context) + Emu.BlockingCallFromMainThread([&]() { - QOffscreenSurface* surface = nullptr; - - // Workaround for the Qt warning: "Attempting to create QWindow-based QOffscreenSurface outside the gui thread. Expect failures." - Emu.BlockingCallFromMainThread([&]() + if (m_primary_context) { - surface = new QOffscreenSurface(); + QOffscreenSurface* surface = new QOffscreenSurface(); surface->setFormat(m_format); surface->create(); - }); - // Share resources with the first created context - context->handle->setShareContext(m_primary_context->handle); - context->surface = surface; - context->owner = true; - } - else - { - // This is the first created context, all others will share resources with this one - m_primary_context = context; - context->surface = this; - context->owner = false; - } + // Share resources with the first created context + context->handle->setShareContext(m_primary_context->handle); + context->surface = surface; + context->owner = true; + } + else + { + // This is the first created context, all others will share resources with this one + m_primary_context = context; + context->surface = this; + context->owner = false; + } - context->handle->setFormat(m_format); + context->handle->setFormat(m_format); - if (!context->handle->create()) + if (!context->handle->create()) + { + success = false; + } + }); + + if (!success) { fmt::throw_exception("Failed to create OpenGL context"); } @@ -110,8 +116,8 @@ void gl_gs_frame::delete_context(draw_context_t ctx) gl_ctx->handle->doneCurrent(); #ifdef _MSC_VER - //AMD driver crashes when executing wglDeleteContext - //Catch with SEH + // AMD driver crashes when executing wglDeleteContext, probably because the current thread does not own the context. + // Catch with SEH __try { delete gl_ctx->handle; From 400b4e71cfd33502a7ee0e25bb0c9939a1b18c5c Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 20 Nov 2025 10:11:50 +0300 Subject: [PATCH 5/7] gl: Lower the GPU decoding requirements since each level is decoded separately --- rpcs3/Emu/RSX/GL/GLTexture.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index 3d4632b4e1..6fd04020ec 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -717,7 +717,7 @@ namespace gl } rsx::io_buffer io_buf = dst_buffer; - caps.supports_hw_deswizzle = (is_swizzled && driver_caps.ARB_compute_shader_supported && image_linear_size > 4096); + caps.supports_hw_deswizzle = (is_swizzled && driver_caps.ARB_compute_shader_supported && image_linear_size > 1024); auto op = upload_texture_subresource(io_buf, layout, format, is_swizzled, caps); // Define upload region From e6d723c675921b9660d24490a6c327d30050f305 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Fri, 21 Nov 2025 01:23:58 +0300 Subject: [PATCH 6/7] rsx: Fix 16-bit format hardware deswizzle --- rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl index 17d801c877..708f703983 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl @@ -110,7 +110,7 @@ void write16(inout uint accumulator, const in uint subword, const in uint src_id if (subword == 1) { - data_out[dst_id / 2] = accumulator; + data_out[dst_id / 2] = %f(accumulator); } } @@ -167,7 +167,7 @@ void main() #if USE_8BIT_ADDRESSING for (uint subword = 0, accumulator = 0; subword < 4; ++subword, ++x) { #elif USE_16BIT_ADDRESSING - for (uint subword = 0, temp = 0; subword < 2; ++subword, ++x) { + for (uint subword = 0, accumulator = 0; subword < 2; ++subword, ++x) { #endif uint src_texel_id = get_z_index(x, y, z); From 9deb6cd4fa373b5fd0883a077943b9a25c46541d Mon Sep 17 00:00:00 2001 From: FeTetra <166051662+FeTetra@users.noreply.github.com> Date: Fri, 21 Nov 2025 09:07:47 -0500 Subject: [PATCH 7/7] Fix ppu_register_function_at with unaligned parameters (#17718) This fixes some crashes in interpreter mode when calling functions like `sys_dbg_write_process_memory` to write data which may not be an instruction. --------- Co-authored-by: Elad <18193363+elad335@users.noreply.github.com> --- rpcs3/Emu/Cell/PPUThread.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index f0b9b4d462..95536540f5 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -827,6 +827,9 @@ extern void ppu_register_function_at(u32 addr, u32 size, ppu_intrp_func_t ptr = return; } + size = utils::align(size + addr % 4, 4); + addr &= -4; + if (g_cfg.core.ppu_decoder == ppu_decoder_type::llvm) { return;