mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-12-06 07:12:28 +01:00
rsx/vk: Add support for 8 and 16-bit texel GPU-accelerated deswizzle
This commit is contained in:
parent
b067688c8e
commit
ff72f944ba
|
|
@ -1096,53 +1096,35 @@ namespace rsx
|
||||||
fmt::throw_exception("Wrong format 0x%x", format);
|
fmt::throw_exception("Wrong format 0x%x", format);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (word_size)
|
if (!word_size)
|
||||||
{
|
{
|
||||||
if (word_size == 1)
|
return result;
|
||||||
{
|
|
||||||
if (is_swizzled)
|
|
||||||
{
|
|
||||||
copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span<u8>(), src_layout.data.as_span<const u8>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
|
|
||||||
}
|
}
|
||||||
else if (caps.supports_zero_copy)
|
|
||||||
{
|
|
||||||
result.require_upload = true;
|
|
||||||
result.deferred_cmds = build_transfer_cmds(src_layout.data.data(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u8>(), src_layout.data.as_span<const u8>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
result.element_size = word_size;
|
result.element_size = word_size;
|
||||||
result.block_length = words_per_block;
|
result.block_length = words_per_block;
|
||||||
|
|
||||||
bool require_cpu_swizzle = !caps.supports_hw_deswizzle && is_swizzled;
|
bool require_cpu_swizzle = !caps.supports_hw_deswizzle && is_swizzled;
|
||||||
bool require_cpu_byteswap = !caps.supports_byteswap;
|
bool require_cpu_byteswap = word_size > 1 && !caps.supports_byteswap;
|
||||||
|
|
||||||
if (is_swizzled && caps.supports_hw_deswizzle)
|
if (is_swizzled && caps.supports_hw_deswizzle)
|
||||||
{
|
|
||||||
if (word_size == 4 || (((word_size * words_per_block) & 3) == 0))
|
|
||||||
{
|
{
|
||||||
result.require_deswizzle = true;
|
result.require_deswizzle = true;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
require_cpu_swizzle = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!require_cpu_byteswap && !require_cpu_swizzle)
|
if (!require_cpu_byteswap && !require_cpu_swizzle)
|
||||||
{
|
{
|
||||||
result.require_swap = true;
|
result.require_swap = (word_size > 1);
|
||||||
|
|
||||||
if (caps.supports_zero_copy)
|
if (caps.supports_zero_copy)
|
||||||
{
|
{
|
||||||
result.require_upload = true;
|
result.require_upload = true;
|
||||||
result.deferred_cmds = build_transfer_cmds(src_layout.data.data(), word_size * words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
result.deferred_cmds = build_transfer_cmds(src_layout.data.data(), word_size * words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
||||||
}
|
}
|
||||||
|
else if (word_size == 1)
|
||||||
|
{
|
||||||
|
copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u8>(), src_layout.data.as_span<const u8>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
||||||
|
}
|
||||||
else if (word_size == 2)
|
else if (word_size == 2)
|
||||||
{
|
{
|
||||||
copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u16>(), src_layout.data.as_span<const u16>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u16>(), src_layout.data.as_span<const u16>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
||||||
|
|
@ -1151,10 +1133,16 @@ namespace rsx
|
||||||
{
|
{
|
||||||
copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u32>(), src_layout.data.as_span<const u32>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u32>(), src_layout.data.as_span<const u32>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
|
if (word_size == 1)
|
||||||
{
|
{
|
||||||
if (word_size == 2)
|
ensure(is_swizzled);
|
||||||
|
copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span<u8>(), src_layout.data.as_span<const u8>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
|
||||||
|
}
|
||||||
|
else if (word_size == 2)
|
||||||
{
|
{
|
||||||
if (is_swizzled)
|
if (is_swizzled)
|
||||||
copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span<u16>(), src_layout.data.as_span<const be_t<u16>>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
|
copy_unmodified_block_swizzled::copy_mipmap_level(dst_buffer.as_span<u16>(), src_layout.data.as_span<const be_t<u16>>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block);
|
||||||
|
|
@ -1168,9 +1156,6 @@ namespace rsx
|
||||||
else
|
else
|
||||||
copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u32>(), src_layout.data.as_span<const be_t<u32>>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span<u32>(), src_layout.data.as_span<const be_t<u32>>(), words_per_block, w, h, depth, src_layout.border, dst_pitch_in_block, src_layout.pitch_in_block);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1219,7 +1219,7 @@ namespace rsx
|
||||||
|
|
||||||
if (result.size() > 1)
|
if (result.size() > 1)
|
||||||
{
|
{
|
||||||
std::sort(result.begin(), result.end(), [](const auto &a, const auto &b)
|
result.sort([](const auto &a, const auto &b)
|
||||||
{
|
{
|
||||||
if (a.surface->last_use_tag == b.surface->last_use_tag)
|
if (a.surface->last_use_tag == b.surface->last_use_tag)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,9 @@ R"(
|
||||||
|
|
||||||
#define SSBO_LOCATION(x) (x + %loc)
|
#define SSBO_LOCATION(x) (x + %loc)
|
||||||
|
|
||||||
|
#define USE_8BIT_ADDRESSING %_8bit
|
||||||
|
#define USE_16BIT_ADDRESSING %_16bit
|
||||||
|
|
||||||
layout(local_size_x = %ws, local_size_y = 1, local_size_z = 1) in;
|
layout(local_size_x = %ws, local_size_y = 1, local_size_z = 1) in;
|
||||||
|
|
||||||
layout(%set, binding=SSBO_LOCATION(0), std430) buffer ssbo0{ uint data_in[]; };
|
layout(%set, binding=SSBO_LOCATION(0), std430) buffer ssbo0{ uint data_in[]; };
|
||||||
|
|
@ -98,12 +101,57 @@ uint get_z_index(const in uint x_, const in uint y_, const in uint z_)
|
||||||
return offset;
|
return offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if USE_16BIT_ADDRESSING
|
||||||
|
|
||||||
|
void write16(inout uint accumulator, const in uint subword, const in uint src_id, const in uint dst_id)
|
||||||
|
{
|
||||||
|
const uint masks[] = { 0x0000FFFF, 0xFFFF0000 };
|
||||||
|
accumulator |= data_in[src_id / 2] & masks[subword];
|
||||||
|
|
||||||
|
if (subword == 1)
|
||||||
|
{
|
||||||
|
data_out[dst_id / 2] = accumulator;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif USE_8BIT_ADDRESSING
|
||||||
|
|
||||||
|
void write8(inout uint accumulator, const in uint subword, const in uint src_id, const in uint dst_id)
|
||||||
|
{
|
||||||
|
const uint masks[] = { 0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000 };
|
||||||
|
accumulator |= data_in[src_id / 4] & masks[subword];
|
||||||
|
|
||||||
|
if (subword == 3)
|
||||||
|
{
|
||||||
|
data_out[dst_id / 4] = accumulator;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
void write32(const in uint word_count, in uint src_id, in uint dst_id)
|
||||||
|
{
|
||||||
|
for (uint i = 0; i < word_count; ++i)
|
||||||
|
{
|
||||||
|
uint value = data_in[src_id++];
|
||||||
|
data_out[dst_id++] = %f(value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
void main()
|
void main()
|
||||||
{
|
{
|
||||||
uint invocations_x = (gl_NumWorkGroups.x * gl_WorkGroupSize.x);
|
uint invocations_x = (gl_NumWorkGroups.x * gl_WorkGroupSize.x);
|
||||||
uint texel_id = (gl_GlobalInvocationID.y * invocations_x) + gl_GlobalInvocationID.x;
|
uint texel_id = (gl_GlobalInvocationID.y * invocations_x) + gl_GlobalInvocationID.x;
|
||||||
uint word_count = %_wordcount;
|
uint word_count = %_wordcount;
|
||||||
|
|
||||||
|
#if USE_8BIT_ADDRESSING
|
||||||
|
texel_id *= 4; // Each invocation consumes 4 texels
|
||||||
|
#elif USE_16BIT_ADDRESSING
|
||||||
|
texel_id *= 2; // Each invocation consumes 2 texels
|
||||||
|
#endif
|
||||||
|
|
||||||
if (!init_invocation_properties(texel_id))
|
if (!init_invocation_properties(texel_id))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
|
@ -116,14 +164,25 @@ void main()
|
||||||
uint y = (slice_offset / row_length);
|
uint y = (slice_offset / row_length);
|
||||||
uint x = (slice_offset % row_length);
|
uint x = (slice_offset % row_length);
|
||||||
|
|
||||||
|
#if USE_8BIT_ADDRESSING
|
||||||
|
for (uint subword = 0, accumulator = 0; subword < 4; ++subword, ++x) {
|
||||||
|
#elif USE_16BIT_ADDRESSING
|
||||||
|
for (uint subword = 0, temp = 0; subword < 2; ++subword, ++x) {
|
||||||
|
#endif
|
||||||
|
|
||||||
uint src_texel_id = get_z_index(x, y, z);
|
uint src_texel_id = get_z_index(x, y, z);
|
||||||
uint dst_id = (texel_id * word_count);
|
uint dst_id = (texel_id * word_count);
|
||||||
uint src_id = (src_texel_id + invocation.data_offset) * word_count;
|
uint src_id = (src_texel_id + invocation.data_offset) * word_count;
|
||||||
|
|
||||||
for (uint i = 0; i < word_count; ++i)
|
#if USE_8BIT_ADDRESSING
|
||||||
{
|
write8(accumulator, subword, src_id, dst_id);
|
||||||
uint value = data_in[src_id++];
|
|
||||||
data_out[dst_id++] = %f(value);
|
|
||||||
}
|
}
|
||||||
|
#elif USE_16BIT_ADDRESSING
|
||||||
|
write16(accumulator, subword, src_id, dst_id);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
write32(word_count, src_id, dst_id);
|
||||||
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
)"
|
)"
|
||||||
|
|
|
||||||
|
|
@ -403,8 +403,6 @@ namespace vk
|
||||||
|
|
||||||
cs_deswizzle_3d()
|
cs_deswizzle_3d()
|
||||||
{
|
{
|
||||||
ensure((sizeof(_BlockType) & 3) == 0); // "Unsupported block type"
|
|
||||||
|
|
||||||
ssbo_count = 2;
|
ssbo_count = 2;
|
||||||
use_push_constants = true;
|
use_push_constants = true;
|
||||||
push_constants_size = 28;
|
push_constants_size = 28;
|
||||||
|
|
@ -438,8 +436,10 @@ namespace vk
|
||||||
{ "%set", "set = 0" },
|
{ "%set", "set = 0" },
|
||||||
{ "%push_block", "push_constant" },
|
{ "%push_block", "push_constant" },
|
||||||
{ "%ws", std::to_string(optimal_group_size) },
|
{ "%ws", std::to_string(optimal_group_size) },
|
||||||
{ "%_wordcount", std::to_string(sizeof(_BlockType) / 4) },
|
{ "%_wordcount", std::to_string(std::max<u32>(sizeof(_BlockType) / 4u, 1u)) },
|
||||||
{ "%f", transform }
|
{ "%f", transform },
|
||||||
|
{ "%_8bit", sizeof(_BlockType) == 1 ? "1" : "0" },
|
||||||
|
{ "%_16bit", sizeof(_BlockType) == 2 ? "1" : "0" },
|
||||||
};
|
};
|
||||||
|
|
||||||
m_src = fmt::replace_all(m_src, syntax_replace);
|
m_src = fmt::replace_all(m_src, syntax_replace);
|
||||||
|
|
@ -476,7 +476,21 @@ namespace vk
|
||||||
params.logd = rsx::ceil_log2(depth);
|
params.logd = rsx::ceil_log2(depth);
|
||||||
|
|
||||||
const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size);
|
const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size);
|
||||||
const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation);
|
u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation);
|
||||||
|
|
||||||
|
// Check if we need to do subaddressing and adjust invocation count accordingly
|
||||||
|
switch (sizeof(_BlockType))
|
||||||
|
{
|
||||||
|
case 1:
|
||||||
|
linear_invocations /= 4;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
linear_invocations /= 2;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
compute_task::run(cmd, linear_invocations);
|
compute_task::run(cmd, linear_invocations);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -759,6 +759,10 @@ namespace vk
|
||||||
{
|
{
|
||||||
switch (block_size)
|
switch (block_size)
|
||||||
{
|
{
|
||||||
|
case 1:
|
||||||
|
return vk::get_compute_task<cs_deswizzle_3d<u8, u8, false>>();
|
||||||
|
case 2:
|
||||||
|
return vk::get_compute_task<cs_deswizzle_3d<u16, WordType, SwapBytes>>();
|
||||||
case 4:
|
case 4:
|
||||||
return vk::get_compute_task<cs_deswizzle_3d<u32, WordType, SwapBytes>>();
|
return vk::get_compute_task<cs_deswizzle_3d<u32, WordType, SwapBytes>>();
|
||||||
case 8:
|
case 8:
|
||||||
|
|
@ -776,21 +780,27 @@ namespace vk
|
||||||
vk::cs_deswizzle_base* job = nullptr;
|
vk::cs_deswizzle_base* job = nullptr;
|
||||||
const auto block_size = (word_size * word_count);
|
const auto block_size = (word_size * word_count);
|
||||||
|
|
||||||
ensure(word_size == 4 || word_size == 2);
|
|
||||||
|
|
||||||
if (!swap_bytes)
|
if (!swap_bytes)
|
||||||
{
|
{
|
||||||
if (word_size == 4)
|
switch (word_size)
|
||||||
{
|
|
||||||
job = get_deswizzle_transformation<u32, false>(block_size);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
|
case 1:
|
||||||
|
job = get_deswizzle_transformation<u8, false>(block_size);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
job = get_deswizzle_transformation<u16, false>(block_size);
|
job = get_deswizzle_transformation<u16, false>(block_size);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
job = get_deswizzle_transformation<u32, false>(block_size);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fmt::throw_exception("Unimplemented deswizzle for format.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
ensure(word_size == 2 || word_size == 4);
|
||||||
|
|
||||||
if (word_size == 4)
|
if (word_size == 4)
|
||||||
{
|
{
|
||||||
job = get_deswizzle_transformation<u32, true>(block_size);
|
job = get_deswizzle_transformation<u32, true>(block_size);
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue