xenia/src/xenia/gpu/shaders/resolve_fast_32bpp_4xmsaa.xesli

86 lines
3.8 KiB
Plaintext
Raw Normal View History

/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "endian.xesli"
#define XE_RESOLVE_SOURCE_TYPE xesl_uint4
#include "resolve.xesli"
xesl_writeTypedStorageBuffer_declare(xesl_uint4, xe_resolve_dest, set=1,
binding=0, u0, space0)
#define xesl_localSize_x 8
#define xesl_localSize_y 8
#define xesl_localSize_z 1
xesl_entry_bindings_begin_compute
XE_RESOLVE_PUSH_CONSTANTS_BINDING
xesl_entry_binding_next
xesl_writeTypedStorageBuffer_binding(xesl_uint4, xe_resolve_dest,
buffer(1))
xesl_entry_binding_next
XE_RESOLVE_COPY_EDRAM_BINDING
xesl_entry_bindings_end_inputs_begin_compute
xesl_entry_input_globalInvocationID
xesl_entry_inputs_end_code_begin_compute
// 1 thread = 8 host pixels.
XeResolveInfo resolve_info =
XeResolveGetInfo(xesl_function_call_pushConstants);
// Group height can't cross resolve granularity, Y overflow check not needed.
xesl_dont_flatten
if (xesl_GlobalInvocationID.x >= resolve_info.width_div_8_scaled) {
return;
}
xesl_uint2 pixel_index = xesl_GlobalInvocationID.xy << xesl_uint2(3u, 0u);
uint source_address_int4s =
XeEdramOffsetInts(
xesl_uint2(pixel_index.x,
max(pixel_index.y,
uint(resolve_info.duplicate_second_host_pixel.y))) +
resolve_info.edram_offset_scaled,
resolve_info.edram_base_tiles, resolve_info.edram_pitch_tiles,
kXenosMsaaSamples_4X, resolve_info.edram_is_depth, 0u,
XeResolveFirstSampleIndex(resolve_info.sample_select),
resolve_info.resolution_scale)
>> 2u;
xesl_uint4 pixels_0123, pixels_4567;
xesl_dont_flatten
if (resolve_info.sample_select != kXenosCopySampleSelect_2 &&
resolve_info.sample_select != kXenosCopySampleSelect_3) {
pixels_0123.xy = xesl_typedStorageBufferLoad(
xe_resolve_edram, source_address_int4s).xz;
pixels_0123.zw = xesl_typedStorageBufferLoad(
xe_resolve_edram, source_address_int4s + 1u).xz;
pixels_4567.xy = xesl_typedStorageBufferLoad(
xe_resolve_edram, source_address_int4s + 2u).xz;
pixels_4567.zw = xesl_typedStorageBufferLoad(
xe_resolve_edram, source_address_int4s + 3u).xz;
} else {
pixels_0123.xy = xesl_typedStorageBufferLoad(
xe_resolve_edram, source_address_int4s).yw;
pixels_0123.zw = xesl_typedStorageBufferLoad(
xe_resolve_edram, source_address_int4s + 1u).yw;
pixels_4567.xy = xesl_typedStorageBufferLoad(
xe_resolve_edram, source_address_int4s + 2u).yw;
pixels_4567.zw = xesl_typedStorageBufferLoad(
xe_resolve_edram, source_address_int4s + 3u).yw;
}
if (resolve_info.duplicate_second_host_pixel.x && pixel_index.x == 0u) {
pixels_0123.x = pixels_0123.y;
}
XeResolveSwap8PixelsRedBlue32bpp(resolve_info, pixels_0123, pixels_4567);
uint dest_address =
XeResolveDestPixelAddress(resolve_info, pixel_index, 2u) >> 4u;
xesl_writeTypedStorageBufferStore(
xe_resolve_dest, dest_address,
XeEndianSwap32(pixels_0123, resolve_info.dest_endian_128));
dest_address += XeResolveDestRightConsecutiveBlocksOffset(
pixel_index.x, 2u, resolve_info.resolution_scale) >> 4u;
xesl_writeTypedStorageBufferStore(
xe_resolve_dest, dest_address,
XeEndianSwap32(pixels_4567, resolve_info.dest_endian_128));
xesl_entry_code_end_compute