diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.cpp b/rpcs3/Emu/RSX/Common/TextureUtils.cpp index 382ef9cc06..96f87111ff 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.cpp +++ b/rpcs3/Emu/RSX/Common/TextureUtils.cpp @@ -1096,80 +1096,65 @@ namespace rsx fmt::throw_exception("Wrong format 0x%x", format); } - if (word_size) + if (!word_size) { - if (word_size == 1) + return result; + } + + result.element_size = word_size; + result.block_length = words_per_block; + + bool require_cpu_swizzle = !caps.supports_hw_deswizzle && is_swizzled; + bool require_cpu_byteswap = word_size > 1 && !caps.supports_byteswap; + + if (is_swizzled && caps.supports_hw_deswizzle) + { + result.require_deswizzle = true; + } + + if (!require_cpu_byteswap && !require_cpu_swizzle) + { + result.require_swap = (word_size > 1); + + if (caps.supports_zero_copy) { - if (is_swizzled) - { - copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); - } - else if (caps.supports_zero_copy) - { - result.require_upload = true; - result.deferred_cmds = build_transfer_cmds(src_layout.data.data(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); - } - else - { - copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); - } + result.require_upload = true; + result.deferred_cmds = build_transfer_cmds(src_layout.data.data(), word_size * words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); } + else if (word_size == 1) + { + copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); + } + else if (word_size == 2) + { + copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); + } + else if (word_size == 4) + { + copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); + } + + return result; + } + + if (word_size == 1) + { + ensure(is_swizzled); + copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); + } + else if (word_size == 2) + { + if (is_swizzled) + copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); else - { - result.element_size = word_size; - result.block_length = words_per_block; - - bool require_cpu_swizzle = !caps.supports_hw_deswizzle && is_swizzled; - bool require_cpu_byteswap = !caps.supports_byteswap; - - if (is_swizzled && caps.supports_hw_deswizzle) - { - if (word_size == 4 || (((word_size * words_per_block) & 3) == 0)) - { - result.require_deswizzle = true; - } - else - { - require_cpu_swizzle = true; - } - } - - if (!require_cpu_byteswap && !require_cpu_swizzle) - { - result.require_swap = true; - - if (caps.supports_zero_copy) - { - result.require_upload = true; - result.deferred_cmds = build_transfer_cmds(src_layout.data.data(), word_size * words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); - } - else if (word_size == 2) - { - copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); - } - else if (word_size == 4) - { - copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); - } - } - else - { - if (word_size == 2) - { - if (is_swizzled) - copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); - else - copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); - } - else if (word_size == 4) - { - if (is_swizzled) - copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); - else - copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); - } - } - } + copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); + } + else if (word_size == 4) + { + if (is_swizzled) + copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block); + else + copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block); } return result; diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index 5fa595a80b..4476930607 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -1219,7 +1219,7 @@ namespace rsx if (result.size() > 1) { - std::sort(result.begin(), result.end(), [](const auto &a, const auto &b) + result.sort([](const auto &a, const auto &b) { if (a.surface->last_use_tag == b.surface->last_use_tag) { diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl index c2d679db6e..17d801c877 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/GPUDeswizzle.glsl @@ -3,6 +3,9 @@ R"( #define SSBO_LOCATION(x) (x + %loc) +#define USE_8BIT_ADDRESSING %_8bit +#define USE_16BIT_ADDRESSING %_16bit + layout(local_size_x = %ws, local_size_y = 1, local_size_z = 1) in; layout(%set, binding=SSBO_LOCATION(0), std430) buffer ssbo0{ uint data_in[]; }; @@ -98,12 +101,57 @@ uint get_z_index(const in uint x_, const in uint y_, const in uint z_) return offset; } +#if USE_16BIT_ADDRESSING + +void write16(inout uint accumulator, const in uint subword, const in uint src_id, const in uint dst_id) +{ + const uint masks[] = { 0x0000FFFF, 0xFFFF0000 }; + accumulator |= data_in[src_id / 2] & masks[subword]; + + if (subword == 1) + { + data_out[dst_id / 2] = accumulator; + } +} + +#elif USE_8BIT_ADDRESSING + +void write8(inout uint accumulator, const in uint subword, const in uint src_id, const in uint dst_id) +{ + const uint masks[] = { 0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000 }; + accumulator |= data_in[src_id / 4] & masks[subword]; + + if (subword == 3) + { + data_out[dst_id / 4] = accumulator; + } +} + +#else + +void write32(const in uint word_count, in uint src_id, in uint dst_id) +{ + for (uint i = 0; i < word_count; ++i) + { + uint value = data_in[src_id++]; + data_out[dst_id++] = %f(value); + } +} + +#endif + void main() { uint invocations_x = (gl_NumWorkGroups.x * gl_WorkGroupSize.x); uint texel_id = (gl_GlobalInvocationID.y * invocations_x) + gl_GlobalInvocationID.x; uint word_count = %_wordcount; +#if USE_8BIT_ADDRESSING + texel_id *= 4; // Each invocation consumes 4 texels +#elif USE_16BIT_ADDRESSING + texel_id *= 2; // Each invocation consumes 2 texels +#endif + if (!init_invocation_properties(texel_id)) return; @@ -116,14 +164,25 @@ void main() uint y = (slice_offset / row_length); uint x = (slice_offset % row_length); - uint src_texel_id = get_z_index(x, y, z); - uint dst_id = (texel_id * word_count); - uint src_id = (src_texel_id + invocation.data_offset) * word_count; +#if USE_8BIT_ADDRESSING + for (uint subword = 0, accumulator = 0; subword < 4; ++subword, ++x) { +#elif USE_16BIT_ADDRESSING + for (uint subword = 0, temp = 0; subword < 2; ++subword, ++x) { +#endif - for (uint i = 0; i < word_count; ++i) - { - uint value = data_in[src_id++]; - data_out[dst_id++] = %f(value); + uint src_texel_id = get_z_index(x, y, z); + uint dst_id = (texel_id * word_count); + uint src_id = (src_texel_id + invocation.data_offset) * word_count; + +#if USE_8BIT_ADDRESSING + write8(accumulator, subword, src_id, dst_id); } +#elif USE_16BIT_ADDRESSING + write16(accumulator, subword, src_id, dst_id); + } +#else + write32(word_count, src_id, dst_id); +#endif + } )" diff --git a/rpcs3/Emu/RSX/VK/VKCompute.h b/rpcs3/Emu/RSX/VK/VKCompute.h index 6e8f37772a..ec5e8d32a6 100644 --- a/rpcs3/Emu/RSX/VK/VKCompute.h +++ b/rpcs3/Emu/RSX/VK/VKCompute.h @@ -403,8 +403,6 @@ namespace vk cs_deswizzle_3d() { - ensure((sizeof(_BlockType) & 3) == 0); // "Unsupported block type" - ssbo_count = 2; use_push_constants = true; push_constants_size = 28; @@ -438,8 +436,10 @@ namespace vk { "%set", "set = 0" }, { "%push_block", "push_constant" }, { "%ws", std::to_string(optimal_group_size) }, - { "%_wordcount", std::to_string(sizeof(_BlockType) / 4) }, - { "%f", transform } + { "%_wordcount", std::to_string(std::max(sizeof(_BlockType) / 4u, 1u)) }, + { "%f", transform }, + { "%_8bit", sizeof(_BlockType) == 1 ? "1" : "0" }, + { "%_16bit", sizeof(_BlockType) == 2 ? "1" : "0" }, }; m_src = fmt::replace_all(m_src, syntax_replace); @@ -476,7 +476,21 @@ namespace vk params.logd = rsx::ceil_log2(depth); const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size); - const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation); + u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation); + + // Check if we need to do subaddressing and adjust invocation count accordingly + switch (sizeof(_BlockType)) + { + case 1: + linear_invocations /= 4; + break; + case 2: + linear_invocations /= 2; + break; + default: + break; + } + compute_task::run(cmd, linear_invocations); } }; diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index 413333d500..a57378384a 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -759,6 +759,10 @@ namespace vk { switch (block_size) { + case 1: + return vk::get_compute_task>(); + case 2: + return vk::get_compute_task>(); case 4: return vk::get_compute_task>(); case 8: @@ -776,21 +780,27 @@ namespace vk vk::cs_deswizzle_base* job = nullptr; const auto block_size = (word_size * word_count); - ensure(word_size == 4 || word_size == 2); - if (!swap_bytes) { - if (word_size == 4) - { - job = get_deswizzle_transformation(block_size); - } - else + switch (word_size) { + case 1: + job = get_deswizzle_transformation(block_size); + break; + case 2: job = get_deswizzle_transformation(block_size); + break; + case 4: + job = get_deswizzle_transformation(block_size); + break; + default: + fmt::throw_exception("Unimplemented deswizzle for format."); } } else { + ensure(word_size == 2 || word_size == 4); + if (word_size == 4) { job = get_deswizzle_transformation(block_size);