rpcsx/rpcsx-gpu2/lib/amdgpu-tiler/shaders/tilerLinear.comp.glsl
2024-09-25 16:00:55 +03:00

77 lines
3 KiB
GLSL

#version 460
#extension GL_GOOGLE_include_directive : enable
#extension GL_EXT_shader_explicit_arithmetic_types : enable
#extension GL_EXT_shader_atomic_int64 : enable
#extension GL_EXT_shader_atomic_float : enable
#extension GL_EXT_shader_image_load_formatted : enable
#extension GL_KHR_memory_scope_semantics : enable
#extension GL_EXT_shared_memory_block : enable
#extension GL_EXT_scalar_block_layout : enable
#extension GL_EXT_null_initializer : enable
#extension GL_EXT_buffer_reference2 : enable
#extension GL_EXT_buffer_reference_uvec2 : enable
#include "tiler.glsl"
void main() {
uvec3 pos = gl_GlobalInvocationID;
uint64_t tiledSliceOffset = 0;
uint64_t linearSliceOffset = 0;
if (config.tiledSurfaceSize != 0) {
tiledSliceOffset = pos.z * config.tiledSurfaceSize;
linearSliceOffset = pos.z * config.linearSurfaceSize;
pos.z = 0;
}
uint64_t tiledByteOffset = computeLinearOffset(
config.bitsPerElement,
config.dataSize.y,
config.dataSize.x,
pos
) / 8;
tiledByteOffset += tiledSliceOffset;
uint64_t linearByteOffset = computeLinearElementByteOffset(
pos,
0,
config.dataSize.x,
config.dataSize.x * config.dataSize.y,
config.bitsPerElement,
1 << config.numFragments
);
linearByteOffset += linearSliceOffset;
switch ((config.bitsPerElement + 7) / 8) {
case 1:
buffer_reference_uint8_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint8_t(config.srcAddress + linearByteOffset).data;
break;
case 2:
buffer_reference_uint16_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint16_t(config.srcAddress + linearByteOffset).data;
break;
case 4:
buffer_reference_uint32_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint32_t(config.srcAddress + linearByteOffset).data;
break;
case 8:
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset).data;
break;
case 16:
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset).data;
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset + 8).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset + 8).data;
break;
case 32:
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset).data;
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset + 8).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset + 8).data;
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset + 16).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset + 16).data;
buffer_reference_uint64_t(config.dstAddress + tiledByteOffset + 24).data = buffer_reference_uint64_t(config.srcAddress + linearByteOffset + 24).data;
break;
}
}