mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-12-06 07:12:28 +01:00
rsx: Rework GPU deswizzle kernel to prevent hangs
Some checks are pending
Generate Translation Template / Generate Translation Template (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (/rpcs3/.ci/build-linux-aarch64.sh, gcc, rpcs3/rpcs3-ci-jammy-aarch64:1.7, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (/rpcs3/.ci/build-linux.sh, gcc, rpcs3/rpcs3-ci-jammy:1.7, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (a1d35836e8d45bfc6f63c26f0a3e5d46ef622fe1, rpcs3/rpcs3-binaries-linux-arm64, /rpcs3/.ci/build-linux-aarch64.sh, clang, rpcs3/rpcs3-ci-jammy-aarch64:1.7, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (d812f1254a1157c80fd402f94446310560f54e5f, rpcs3/rpcs3-binaries-linux, /rpcs3/.ci/build-linux.sh, clang, rpcs3/rpcs3-ci-jammy:1.7, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / RPCS3 Mac ${{ matrix.name }} (51ae32f468089a8169aaf1567de355ff4a3e0842, rpcs3/rpcs3-binaries-mac, .ci/build-mac.sh, Intel) (push) Waiting to run
Build RPCS3 / RPCS3 Mac ${{ matrix.name }} (8e21bdbc40711a3fccd18fbf17b742348b0f4281, rpcs3/rpcs3-binaries-mac-arm64, .ci/build-mac-arm64.sh, Apple Silicon) (push) Waiting to run
Build RPCS3 / RPCS3 Windows (push) Waiting to run
Build RPCS3 / RPCS3 Windows Clang (win64, clang, clang64) (push) Waiting to run
Build RPCS3 / RPCS3 FreeBSD (push) Waiting to run
Some checks are pending
Generate Translation Template / Generate Translation Template (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (/rpcs3/.ci/build-linux-aarch64.sh, gcc, rpcs3/rpcs3-ci-jammy-aarch64:1.7, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (/rpcs3/.ci/build-linux.sh, gcc, rpcs3/rpcs3-ci-jammy:1.7, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (a1d35836e8d45bfc6f63c26f0a3e5d46ef622fe1, rpcs3/rpcs3-binaries-linux-arm64, /rpcs3/.ci/build-linux-aarch64.sh, clang, rpcs3/rpcs3-ci-jammy-aarch64:1.7, ubuntu-24.04-arm) (push) Waiting to run
Build RPCS3 / RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} (d812f1254a1157c80fd402f94446310560f54e5f, rpcs3/rpcs3-binaries-linux, /rpcs3/.ci/build-linux.sh, clang, rpcs3/rpcs3-ci-jammy:1.7, ubuntu-24.04) (push) Waiting to run
Build RPCS3 / RPCS3 Mac ${{ matrix.name }} (51ae32f468089a8169aaf1567de355ff4a3e0842, rpcs3/rpcs3-binaries-mac, .ci/build-mac.sh, Intel) (push) Waiting to run
Build RPCS3 / RPCS3 Mac ${{ matrix.name }} (8e21bdbc40711a3fccd18fbf17b742348b0f4281, rpcs3/rpcs3-binaries-mac-arm64, .ci/build-mac-arm64.sh, Apple Silicon) (push) Waiting to run
Build RPCS3 / RPCS3 Windows (push) Waiting to run
Build RPCS3 / RPCS3 Windows Clang (win64, clang, clang64) (push) Waiting to run
Build RPCS3 / RPCS3 FreeBSD (push) Waiting to run
This commit is contained in:
parent
9deb6cd4fa
commit
7f6842705c
|
|
@ -338,10 +338,10 @@ namespace gl
|
|||
params.logd = rsx::ceil_log2(depth);
|
||||
set_parameters(cmd);
|
||||
|
||||
const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size);
|
||||
const u32 texels_per_dword = std::max<u32>(4u / sizeof(_BlockType), 1u); // For block sizes less than 4 bytes wide
|
||||
const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation) / texels_per_dword;
|
||||
compute_task::run(cmd, linear_invocations);
|
||||
const u32 word_count_per_invocation = std::max<u32>(sizeof(_BlockType) / 4u, 1u);
|
||||
const u32 num_bytes_per_invocation = (word_count_per_invocation * 4u * optimal_group_size);
|
||||
const u32 workgroup_invocations = utils::aligned_div(data_length, num_bytes_per_invocation);
|
||||
compute_task::run(cmd, workgroup_invocations);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -103,34 +103,48 @@ uint get_z_index(const in uint x_, const in uint y_, const in uint z_)
|
|||
|
||||
#if USE_16BIT_ADDRESSING
|
||||
|
||||
void write16(inout uint accumulator, const in uint subword, const in uint src_id, const in uint dst_id)
|
||||
void decode_16b(const in uint texel_id, in uint x, const in uint y, const in uint z)
|
||||
{
|
||||
const uint masks[] = { 0x0000FFFF, 0xFFFF0000 };
|
||||
accumulator |= data_in[src_id / 2] & masks[subword];
|
||||
uint accumulator = 0;
|
||||
|
||||
if (subword == 1)
|
||||
const uint subword_count = min(invocation.size.x, 2);
|
||||
for (uint subword = 0; subword < subword_count; ++subword, ++x)
|
||||
{
|
||||
data_out[dst_id / 2] = %f(accumulator);
|
||||
uint src_texel_id = get_z_index(x, y, z);
|
||||
uint src_id = (src_texel_id + invocation.data_offset);
|
||||
accumulator |= data_in[src_id / 2] & masks[subword];
|
||||
}
|
||||
|
||||
data_out[texel_id / 2] = %f(accumulator);
|
||||
}
|
||||
|
||||
#elif USE_8BIT_ADDRESSING
|
||||
|
||||
void write8(inout uint accumulator, const in uint subword, const in uint src_id, const in uint dst_id)
|
||||
void decode_8b(const in uint texel_id, in uint x, const in uint y, const in uint z)
|
||||
{
|
||||
const uint masks[] = { 0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000 };
|
||||
accumulator |= data_in[src_id / 4] & masks[subword];
|
||||
uint accumulator = 0;
|
||||
|
||||
if (subword == 3)
|
||||
const uint subword_count = min(invocation.size.x, 4);
|
||||
for (uint subword = 0; subword < subword_count; ++subword, ++x)
|
||||
{
|
||||
data_out[dst_id / 4] = accumulator;
|
||||
uint src_texel_id = get_z_index(x, y, z);
|
||||
uint src_id = (src_texel_id + invocation.data_offset);
|
||||
accumulator |= data_in[src_id / 4] & masks[subword];
|
||||
}
|
||||
|
||||
data_out[texel_id / 4] = accumulator;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void write32(const in uint word_count, in uint src_id, in uint dst_id)
|
||||
void decode_32b(const in uint texel_id, const in uint word_count, const in uint x, const in uint y, const in uint z)
|
||||
{
|
||||
uint src_texel_id = get_z_index(x, y, z);
|
||||
uint dst_id = (texel_id * word_count);
|
||||
uint src_id = (src_texel_id + invocation.data_offset) * word_count;
|
||||
|
||||
for (uint i = 0; i < word_count; ++i)
|
||||
{
|
||||
uint value = data_in[src_id++];
|
||||
|
|
@ -165,23 +179,11 @@ void main()
|
|||
uint x = (slice_offset % row_length);
|
||||
|
||||
#if USE_8BIT_ADDRESSING
|
||||
for (uint subword = 0, accumulator = 0; subword < 4; ++subword, ++x) {
|
||||
decode_8b(texel_id, x, y, z);
|
||||
#elif USE_16BIT_ADDRESSING
|
||||
for (uint subword = 0, accumulator = 0; subword < 2; ++subword, ++x) {
|
||||
#endif
|
||||
|
||||
uint src_texel_id = get_z_index(x, y, z);
|
||||
uint dst_id = (texel_id * word_count);
|
||||
uint src_id = (src_texel_id + invocation.data_offset) * word_count;
|
||||
|
||||
#if USE_8BIT_ADDRESSING
|
||||
write8(accumulator, subword, src_id, dst_id);
|
||||
}
|
||||
#elif USE_16BIT_ADDRESSING
|
||||
write16(accumulator, subword, src_id, dst_id);
|
||||
}
|
||||
decode_16b(texel_id, x, y, z);
|
||||
#else
|
||||
write32(word_count, src_id, dst_id);
|
||||
decode_32b(texel_id, word_count, x, y, z);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -475,10 +475,10 @@ namespace vk
|
|||
params.logh = rsx::ceil_log2(height);
|
||||
params.logd = rsx::ceil_log2(depth);
|
||||
|
||||
const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size);
|
||||
const u32 texels_per_dword = std::max<u32>(4u / sizeof(_BlockType), 1u); // For block sizes less than 4 bytes wide
|
||||
const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation) / texels_per_dword;
|
||||
compute_task::run(cmd, linear_invocations);
|
||||
const u32 word_count_per_invocation = std::max<u32>(sizeof(_BlockType) / 4u, 1u);
|
||||
const u32 num_bytes_per_invocation = (word_count_per_invocation * 4u * optimal_group_size);
|
||||
const u32 workgroup_invocations = utils::aligned_div(data_length, num_bytes_per_invocation);
|
||||
compute_task::run(cmd, workgroup_invocations);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue