xenia/src/xenia/gpu/shaders/resolve_fast_32bpp_4xmsaa.xesli
Triang3l b61953374e [GPU] Make resolve EDRAM binding DS 0 and rename it
Ordering the descriptor sets by the change frequency on Vulkan, in increasing order (the opposite of D3D12 root signatures). The EDRAM binding never changes there (always one storage buffer), while the destination buffer binding may become changeable in the future (to split dispatches if exceeding `maxStorageBufferRange`, for example).
2022-06-20 12:15:52 +03:00

86 lines
3.8 KiB
Plaintext

/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "endian.xesli"
#define XE_RESOLVE_SOURCE_TYPE xesl_uint4
#include "resolve.xesli"
xesl_writeTypedStorageBuffer_declare(xesl_uint4, xe_resolve_dest, set=1,
binding=0, u0, space0)
#define xesl_localSize_x 8
#define xesl_localSize_y 8
#define xesl_localSize_z 1
xesl_entry_bindings_begin_compute
XE_RESOLVE_PUSH_CONSTANTS_BINDING
xesl_entry_binding_next
xesl_writeTypedStorageBuffer_binding(xesl_uint4, xe_resolve_dest,
buffer(1))
xesl_entry_binding_next
XE_RESOLVE_COPY_EDRAM_BINDING
xesl_entry_bindings_end_inputs_begin_compute
xesl_entry_input_globalInvocationID
xesl_entry_inputs_end_code_begin_compute
// 1 thread = 8 host pixels.
XeResolveInfo resolve_info =
XeResolveGetInfo(xesl_function_call_pushConstants);
// Group height can't cross resolve granularity, Y overflow check not needed.
xesl_dont_flatten
if (xesl_GlobalInvocationID.x >= resolve_info.width_div_8_scaled) {
return;
}
xesl_uint2 pixel_index = xesl_GlobalInvocationID.xy << xesl_uint2(3u, 0u);
uint source_address_int4s =
XeEdramOffsetInts(
xesl_uint2(pixel_index.x,
max(pixel_index.y,
uint(resolve_info.duplicate_second_host_pixel.y))) +
resolve_info.edram_offset_scaled,
resolve_info.edram_base_tiles, resolve_info.edram_pitch_tiles,
kXenosMsaaSamples_4X, resolve_info.edram_is_depth, 0u,
XeResolveFirstSampleIndex(resolve_info.sample_select),
resolve_info.resolution_scale)
>> 2u;
xesl_uint4 pixels_0123, pixels_4567;
xesl_dont_flatten
if (resolve_info.sample_select != kXenosCopySampleSelect_2 &&
resolve_info.sample_select != kXenosCopySampleSelect_3) {
pixels_0123.xy = xesl_typedStorageBufferLoad(
xe_resolve_edram, source_address_int4s).xz;
pixels_0123.zw = xesl_typedStorageBufferLoad(
xe_resolve_edram, source_address_int4s + 1u).xz;
pixels_4567.xy = xesl_typedStorageBufferLoad(
xe_resolve_edram, source_address_int4s + 2u).xz;
pixels_4567.zw = xesl_typedStorageBufferLoad(
xe_resolve_edram, source_address_int4s + 3u).xz;
} else {
pixels_0123.xy = xesl_typedStorageBufferLoad(
xe_resolve_edram, source_address_int4s).yw;
pixels_0123.zw = xesl_typedStorageBufferLoad(
xe_resolve_edram, source_address_int4s + 1u).yw;
pixels_4567.xy = xesl_typedStorageBufferLoad(
xe_resolve_edram, source_address_int4s + 2u).yw;
pixels_4567.zw = xesl_typedStorageBufferLoad(
xe_resolve_edram, source_address_int4s + 3u).yw;
}
if (resolve_info.duplicate_second_host_pixel.x && pixel_index.x == 0u) {
pixels_0123.x = pixels_0123.y;
}
XeResolveSwap8PixelsRedBlue32bpp(resolve_info, pixels_0123, pixels_4567);
uint dest_address =
XeResolveDestPixelAddress(resolve_info, pixel_index, 2u) >> 4u;
xesl_writeTypedStorageBufferStore(
xe_resolve_dest, dest_address,
XeEndianSwap32(pixels_0123, resolve_info.dest_endian_128));
dest_address += XeResolveDestRightConsecutiveBlocksOffset(
pixel_index.x, 2u, resolve_info.resolution_scale) >> 4u;
xesl_writeTypedStorageBufferStore(
xe_resolve_dest, dest_address,
XeEndianSwap32(pixels_4567, resolve_info.dest_endian_128));
xesl_entry_code_end_compute