2020-12-19 12:28:10 +01:00
# include "stdafx.h"
# include "GLSLCommon.h"
2022-12-05 20:28:43 +01:00
# include "RSXFragmentProgram.h"
2020-12-19 12:28:10 +01:00
2023-04-18 12:19:57 +02:00
# include "Emu/system_config.h"
# include "Emu/RSX/gcm_enums.h"
# include "Utilities/StrFmt.h"
2023-01-09 12:45:07 +01:00
2020-12-19 12:28:10 +01:00
namespace program_common
{
2023-01-09 12:45:07 +01:00
template < typename T >
void define_glsl_constants ( std : : ostream & OS , std : : initializer_list < std : : pair < const char * , T > > enums )
{
for ( const auto & e : enums )
{
OS < < " #define " < < e . first < < " " < < static_cast < int > ( e . second ) < < " \n " ;
}
OS < < " \n " ;
}
2022-04-24 10:56:42 +02:00
void insert_compare_op ( std : : ostream & OS )
2020-12-19 12:28:10 +01:00
{
2022-04-24 10:56:42 +02:00
OS < <
" bool comparison_passes(const in float a, const in float b, const in uint func) \n "
" { \n "
" switch (func) \n "
" { \n "
" default: \n "
" case 0: return false; //never \n "
" case 1: return (CMP_FIXUP(a) < CMP_FIXUP(b)); //less \n "
" case 2: return (CMP_FIXUP(a) == CMP_FIXUP(b)); //equal \n "
" case 3: return (CMP_FIXUP(a) <= CMP_FIXUP(b)); //lequal \n "
" case 4: return (CMP_FIXUP(a) > CMP_FIXUP(b)); //greater \n "
" case 5: return (CMP_FIXUP(a) != CMP_FIXUP(b)); //nequal \n "
" case 6: return (CMP_FIXUP(a) >= CMP_FIXUP(b)); //gequal \n "
" case 7: return true; //always \n "
" } \n "
" } \n \n " ;
2020-12-19 12:28:10 +01:00
}
void insert_compare_op_vector ( std : : ostream & OS )
{
OS < <
" bvec4 comparison_passes(const in vec4 a, const in vec4 b, const in uint func) \n "
" { \n "
" switch (func) \n "
" { \n "
" default: \n "
" case 0: return bvec4(false); //never \n "
2022-04-24 10:56:42 +02:00
" case 1: return lessThan(CMP_FIXUP(a), CMP_FIXUP(b)); //less \n "
" case 2: return equal(CMP_FIXUP(a), CMP_FIXUP(b)); //equal \n "
" case 3: return lessThanEqual(CMP_FIXUP(a), CMP_FIXUP(b)); //lequal \n "
" case 4: return greaterThan(CMP_FIXUP(a), CMP_FIXUP(b)); //greater \n "
" case 5: return notEqual(CMP_FIXUP(a), CMP_FIXUP(b)); //nequal \n "
" case 6: return greaterThanEqual(CMP_FIXUP(a), CMP_FIXUP(b)); //gequal \n "
2020-12-19 12:28:10 +01:00
" case 7: return bvec4(true); //always \n "
" } \n "
" } \n \n " ;
}
2023-01-09 12:45:07 +01:00
void insert_fog_declaration ( std : : ostream & OS , std : : string_view wide_vector_type , std : : string_view input_coord )
2020-12-19 12:28:10 +01:00
{
2023-01-09 12:45:07 +01:00
define_glsl_constants < rsx : : fog_mode > ( OS ,
{
{ " FOG_LINEAR " , rsx : : fog_mode : : linear } ,
{ " FOG_EXP " , rsx : : fog_mode : : exponential } ,
{ " FOG_EXP2 " , rsx : : fog_mode : : exponential2 } ,
{ " FOG_LINEAR_ABS " , rsx : : fog_mode : : linear_abs } ,
{ " FOG_EXP_ABS " , rsx : : fog_mode : : exponential_abs } ,
{ " FOG_EXP2_ABS " , rsx : : fog_mode : : exponential2_abs }
} ) ;
2020-12-19 12:28:10 +01:00
2023-01-09 12:45:07 +01:00
std : : string template_body = " $T fetch_fog_value(const in uint mode) \n " ;
2020-12-19 12:28:10 +01:00
template_body + =
" { \n "
" $T result = $T($I.x, 0., 0., 0.); \n "
" switch(mode) \n "
" { \n "
" default: \n "
" return result; \n "
2023-01-09 12:45:07 +01:00
" case FOG_LINEAR: \n "
2020-12-19 12:28:10 +01:00
" //linear \n "
" result.y = fog_param1 * $I.x + (fog_param0 - 1.); \n "
" break; \n "
2023-01-09 12:45:07 +01:00
" case FOG_EXP: \n "
2020-12-19 12:28:10 +01:00
" //exponential \n "
" result.y = exp(11.084 * (fog_param1 * $I.x + fog_param0 - 1.5)); \n "
" break; \n "
2023-01-09 12:45:07 +01:00
" case FOG_EXP2: \n "
2020-12-19 12:28:10 +01:00
" //exponential2 \n "
" result.y = exp(-pow(4.709 * (fog_param1 * $I.x + fog_param0 - 1.5), 2.)); \n "
" break; \n "
2023-01-09 12:45:07 +01:00
" case FOG_EXP_ABS: \n "
2020-12-19 12:28:10 +01:00
" //exponential_abs \n "
" result.y = exp(11.084 * (fog_param1 * abs($I.x) + fog_param0 - 1.5)); \n "
" break; \n "
2023-01-09 12:45:07 +01:00
" case FOG_EXP2_ABS: \n "
2020-12-19 12:28:10 +01:00
" //exponential2_abs \n "
" result.y = exp(-pow(4.709 * (fog_param1 * abs($I.x) + fog_param0 - 1.5), 2.)); \n "
" break; \n "
2023-01-09 12:45:07 +01:00
" case FOG_LINEAR_ABS: \n "
2020-12-19 12:28:10 +01:00
" //linear_abs \n "
" result.y = fog_param1 * abs($I.x) + (fog_param0 - 1.); \n "
" break; \n "
" } \n "
" \n "
" result.y = clamp(result.y, 0., 1.); \n "
" return result; \n "
" } \n \n " ;
2021-11-28 08:30:41 +01:00
std : : pair < std : : string_view , std : : string > replacements [ ] =
2023-01-09 12:45:07 +01:00
{
std : : make_pair ( " $T " , std : : string ( wide_vector_type ) ) ,
std : : make_pair ( " $I " , std : : string ( input_coord ) )
} ;
2020-12-19 12:28:10 +01:00
OS < < fmt : : replace_all ( template_body , replacements ) ;
}
}
namespace glsl
{
std : : string getFloatTypeNameImpl ( usz elementCount )
{
switch ( elementCount )
{
default :
abort ( ) ;
case 1 :
return " float " ;
case 2 :
return " vec2 " ;
case 3 :
return " vec3 " ;
case 4 :
return " vec4 " ;
}
}
std : : string getHalfTypeNameImpl ( usz elementCount )
{
switch ( elementCount )
{
default :
abort ( ) ;
case 1 :
return " float16_t " ;
case 2 :
return " f16vec2 " ;
case 3 :
return " f16vec3 " ;
case 4 :
return " f16vec4 " ;
}
}
std : : string compareFunctionImpl ( COMPARE f , const std : : string & Op0 , const std : : string & Op1 , bool scalar )
{
if ( scalar )
{
switch ( f )
{
2021-10-05 19:33:58 +02:00
case COMPARE : : SEQ :
2022-04-24 10:56:42 +02:00
return fmt : : format ( " CMP_FIXUP(%s) == CMP_FIXUP(%s) " , Op0 , Op1 ) ;
2021-10-05 19:33:58 +02:00
case COMPARE : : SGE :
2022-04-24 10:56:42 +02:00
return fmt : : format ( " CMP_FIXUP(%s) >= CMP_FIXUP(%s) " , Op0 , Op1 ) ;
2021-10-05 19:33:58 +02:00
case COMPARE : : SGT :
2022-04-24 10:56:42 +02:00
return fmt : : format ( " CMP_FIXUP(%s) > CMP_FIXUP(%s) " , Op0 , Op1 ) ;
2021-10-05 19:33:58 +02:00
case COMPARE : : SLE :
2022-04-24 10:56:42 +02:00
return fmt : : format ( " CMP_FIXUP(%s) <= CMP_FIXUP(%s) " , Op0 , Op1 ) ;
2021-10-05 19:33:58 +02:00
case COMPARE : : SLT :
2022-04-24 12:03:22 +02:00
return fmt : : format ( " CMP_FIXUP(%s) < CMP_FIXUP(%s) " , Op0 , Op1 ) ;
2021-10-05 19:33:58 +02:00
case COMPARE : : SNE :
2022-04-24 12:03:22 +02:00
return fmt : : format ( " CMP_FIXUP(%s) != CMP_FIXUP(%s) " , Op0 , Op1 ) ;
2020-12-19 12:28:10 +01:00
}
}
else
{
switch ( f )
{
2021-10-05 19:33:58 +02:00
case COMPARE : : SEQ :
2022-04-24 10:56:42 +02:00
return fmt : : format ( " equal(CMP_FIXUP(%s), CMP_FIXUP(%s)) " , Op0 , Op1 ) ;
2021-10-05 19:33:58 +02:00
case COMPARE : : SGE :
2022-04-24 10:56:42 +02:00
return fmt : : format ( " greaterThanEqual(CMP_FIXUP(%s), CMP_FIXUP(%s)) " , Op0 , Op1 ) ;
2021-10-05 19:33:58 +02:00
case COMPARE : : SGT :
2022-04-24 10:56:42 +02:00
return fmt : : format ( " greaterThan(CMP_FIXUP(%s), CMP_FIXUP(%s)) " , Op0 , Op1 ) ;
2021-10-05 19:33:58 +02:00
case COMPARE : : SLE :
2022-04-24 10:56:42 +02:00
return fmt : : format ( " lessThanEqual(CMP_FIXUP(%s), CMP_FIXUP(%s)) " , Op0 , Op1 ) ;
2021-10-05 19:33:58 +02:00
case COMPARE : : SLT :
2022-04-24 10:56:42 +02:00
return fmt : : format ( " lessThan(CMP_FIXUP(%s), CMP_FIXUP(%s)) " , Op0 , Op1 ) ;
2021-10-05 19:33:58 +02:00
case COMPARE : : SNE :
2022-04-24 10:56:42 +02:00
return fmt : : format ( " notEqual(CMP_FIXUP(%s), CMP_FIXUP(%s)) " , Op0 , Op1 ) ;
2020-12-19 12:28:10 +01:00
}
}
fmt : : throw_exception ( " Unknown compare function " ) ;
}
void insert_vertex_input_fetch ( std : : stringstream & OS , glsl_rules rules , bool glsl4_compliant )
{
std : : string vertex_id_name = ( rules ! = glsl_rules_spirv ) ? " gl_VertexID " : " gl_VertexIndex " ;
2023-01-09 12:45:07 +01:00
// Actually decode a vertex attribute from a raw byte stream
program_common : : define_glsl_constants < int > ( OS ,
{
{ " VTX_FMT_SNORM16 " , RSX_VERTEX_BASE_TYPE_SNORM16 } ,
{ " VTX_FMT_FLOAT32 " , RSX_VERTEX_BASE_TYPE_FLOAT } ,
{ " VTX_FMT_FLOAT16 " , RSX_VERTEX_BASE_TYPE_HALF_FLOAT } ,
{ " VTX_FMT_UNORM8 " , RSX_VERTEX_BASE_TYPE_UNORM8 } ,
{ " VTX_FMT_SINT16 " , RSX_VERTEX_BASE_TYPE_SINT16 } ,
{ " VTX_FMT_COMP32 " , RSX_VERTEX_BASE_TYPE_CMP32 } ,
{ " VTX_FMT_UINT8 " , RSX_VERTEX_BASE_TYPE_UINT8 }
} ) ;
2020-12-19 12:28:10 +01:00
// For intel GPUs which cannot access vectors in indexed mode (driver bug? or glsl version too low?)
2021-05-09 20:36:21 +02:00
// Note: Tested on Mesa iris with HD 530 and compilant path works fine, may be a bug on Windows proprietary drivers
2020-12-19 12:28:10 +01:00
if ( ! glsl4_compliant )
{
OS < <
2021-05-09 20:32:16 +02:00
" void mov(inout uvec4 vector, const in int index, const in uint scalar) \n "
2020-12-19 12:28:10 +01:00
" { \n "
" switch(index) \n "
" { \n "
" case 0: vector.x = scalar; return; \n "
" case 1: vector.y = scalar; return; \n "
" case 2: vector.z = scalar; return; \n "
" case 3: vector.w = scalar; return; \n "
" } \n "
" } \n \n "
" uint ref(const in uvec4 vector, const in int index) \n "
" { \n "
" switch(index) \n "
" { \n "
" case 0: return vector.x; \n "
" case 1: return vector.y; \n "
" case 2: return vector.z; \n "
" case 3: return vector.w; \n "
" } \n "
" } \n \n " ;
}
else
{
OS < <
" #define mov(v, i, s) v[i] = s \n "
" #define ref(v, i) v[i] \n \n " ;
}
OS < <
" struct attribute_desc \n "
" { \n "
" uint type; \n "
" uint attribute_size; \n "
" uint starting_offset; \n "
" uint stride; \n "
" uint frequency; \n "
" bool swap_bytes; \n "
" bool is_volatile; \n "
" bool modulo; \n "
" }; \n \n "
" uint gen_bits(const in uint x, const in uint y, const in uint z, const in uint w, const in bool swap) \n "
" { \n "
" return (swap) ? \n "
" _set_bits(_set_bits(_set_bits(w, z, 8, 8), y, 16, 8), x, 24, 8) : \n "
" _set_bits(_set_bits(_set_bits(x, y, 8, 8), z, 16, 8), w, 24, 8); \n "
" } \n \n "
" uint gen_bits(const in uint x, const in uint y, const in bool swap) \n "
" { \n "
" return (swap)? _set_bits(y, x, 8, 8) : _set_bits(x, y, 8, 8); \n "
" } \n \n "
// NOTE: (int(n) or int(n)) is broken on some NVIDIA and INTEL hardware when the sign bit is involved.
// See https://github.com/RPCS3/rpcs3/issues/8990
" vec4 sext(const in ivec4 bits) \n "
" { \n "
" // convert raw 16 bit values into signed 32-bit float4 counterpart \n "
" bvec4 sign_check = lessThan(bits, ivec4(0x8000)); \n "
" return _select(bits - 65536, bits, sign_check); \n "
" } \n \n "
" float sext(const in int bits) \n "
" { \n "
" return (bits < 0x8000) ? float(bits) : float(bits - 65536); \n "
" } \n \n "
" vec4 fetch_attribute(const in attribute_desc desc, const in int vertex_id, usamplerBuffer input_stream) \n "
" { \n "
2023-01-16 21:46:31 +01:00
" const int elem_size_table[] = { 0, 2, 4, 2, 1, 2, 4, 1 }; \n "
" const float scaling_table[] = { 1., 32767.5, 1., 1., 255., 1., 32767., 1. }; \n "
" const int elem_size = elem_size_table[desc.type]; \n "
" const vec4 scale = scaling_table[desc.type].xxxx; \n \n "
2020-12-19 12:28:10 +01:00
" uvec4 tmp, result = uvec4(0u); \n "
" vec4 ret; \n "
" int n, i = int((vertex_id * desc.stride) + desc.starting_offset); \n \n "
" for (n = 0; n < desc.attribute_size; n++) \n "
" { \n "
" tmp.x = texelFetch(input_stream, i++).x; \n "
" if (elem_size == 2) \n "
" { \n "
" tmp.y = texelFetch(input_stream, i++).x; \n "
" tmp.x = gen_bits(tmp.x, tmp.y, desc.swap_bytes); \n "
" } \n "
" else if (elem_size == 4) \n "
" { \n "
" tmp.y = texelFetch(input_stream, i++).x; \n "
" tmp.z = texelFetch(input_stream, i++).x; \n "
" tmp.w = texelFetch(input_stream, i++).x; \n "
" tmp.x = gen_bits(tmp.x, tmp.y, tmp.z, tmp.w, desc.swap_bytes); \n "
" } \n \n "
" mov(result, n, tmp.x); \n "
" } \n \n "
" // Actual decoding step is done in vector space, outside the loop \n "
" if (desc.type == VTX_FMT_SNORM16 || desc.type == VTX_FMT_SINT16) \n "
" { \n "
" ret = sext(ivec4(result)); \n "
2022-08-03 18:28:56 +02:00
" ret = fma(vec4(0.5), vec4(desc.type == VTX_FMT_SNORM16), ret); \n "
2020-12-19 12:28:10 +01:00
" } \n "
" else if (desc.type == VTX_FMT_FLOAT32) \n "
" { \n "
" ret = uintBitsToFloat(result); \n "
" } \n "
" else if (desc.type == VTX_FMT_FLOAT16) \n "
" { \n "
" tmp.x = _set_bits(result.x, result.y, 16, 16); \n "
" tmp.y = _set_bits(result.z, result.w, 16, 16); \n "
" ret.xy = unpackHalf2x16(tmp.x); \n "
" ret.zw = unpackHalf2x16(tmp.y); \n "
" } \n "
2021-06-13 22:34:01 +02:00
" else if (elem_size == 1) //(desc.type == VTX_FMT_UINT8 || desc.type == VTX_FMT_UNORM8) \n "
2020-12-19 12:28:10 +01:00
" { \n "
2021-06-13 22:34:01 +02:00
" // Ignore bswap on single byte channels \n "
" ret = vec4(result); \n "
2020-12-19 12:28:10 +01:00
" } \n "
" else //if (desc.type == VTX_FMT_COMP32) \n "
" { \n "
" result = uvec4(_get_bits(result.x, 0, 11), \n "
" _get_bits(result.x, 11, 11), \n "
" _get_bits(result.x, 22, 10), \n "
" uint(scale.x)); \n "
" ret = sext(ivec4(result) << ivec4(5, 5, 6, 0)); \n "
" } \n \n "
" if (desc.attribute_size < 4) \n "
" { \n "
" ret.w = scale.x; \n "
" } \n \n "
" return ret / scale; \n "
" } \n \n "
" attribute_desc fetch_desc(const in int location) \n "
" { \n "
" // Each descriptor is 64 bits wide \n "
" // [0-8] attribute stride \n "
" // [8-24] attribute divisor \n "
" // [24-27] attribute type \n "
" // [27-30] attribute size \n "
" // [30-31] reserved \n "
" // [32-60] starting offset \n "
" // [60-61] swap bytes flag \n "
" // [61-62] volatile flag \n "
" // [62-63] modulo enable flag \n \n " ;
if ( rules = = glsl_rules_opengl4 )
{
// Data is packed into a ubo
OS < <
" int block = (location >> 1); \n "
" int sub_block = (location & 1) << 1; \n "
" uvec2 attrib = uvec2( \n "
" ref(input_attributes_blob[block], sub_block + 0), \n "
" ref(input_attributes_blob[block], sub_block + 1)); \n \n " ;
}
else
{
// Fetch parameters streamed separately from draw parameters
OS < <
" uvec2 attrib = texelFetch(vertex_layout_stream, location + int(layout_ptr_offset)).xy; \n \n " ;
}
OS < <
" attribute_desc result; \n "
" result.stride = _get_bits(attrib.x, 0, 8); \n "
" result.frequency = _get_bits(attrib.x, 8, 16); \n "
" result.type = _get_bits(attrib.x, 24, 3); \n "
" result.attribute_size = _get_bits(attrib.x, 27, 3); \n "
" result.starting_offset = _get_bits(attrib.y, 0, 29); \n "
" result.swap_bytes = _test_bit(attrib.y, 29); \n "
" result.is_volatile = _test_bit(attrib.y, 30); \n "
" result.modulo = _test_bit(attrib.y, 31); \n "
" return result; \n "
" } \n \n "
" vec4 read_location(const in int location) \n "
" { \n "
" attribute_desc desc = fetch_desc(location); \n "
" int vertex_id = " < < vertex_id_name < < " - int(vertex_base_index); \n "
" if (desc.frequency == 0) \n "
" { \n "
" vertex_id = 0; \n "
" } \n "
" else if (desc.modulo) \n "
" { \n "
" //if a vertex modifier is active; vertex_base must be 0 and is ignored \n "
" vertex_id = ( " < < vertex_id_name < < " + int(vertex_index_offset)) % int(desc.frequency); \n "
" } \n "
" else \n "
" { \n "
" vertex_id /= int(desc.frequency); \n "
" } \n \n "
" if (desc.is_volatile) \n "
" return fetch_attribute(desc, vertex_id, volatile_input_stream); \n "
" else \n "
" return fetch_attribute(desc, vertex_id, persistent_input_stream); \n "
" } \n \n " ;
}
void insert_rop_init ( std : : ostream & OS )
{
OS < <
2022-11-16 15:37:49 +01:00
" if (_test_bit(rop_control, POLYGON_STIPPLE_ENABLE_BIT)) \n "
2020-12-19 12:28:10 +01:00
" { \n "
" // Convert x,y to linear address \n "
2021-01-20 20:15:08 +01:00
" const uvec2 stipple_coord = uvec2(gl_FragCoord.xy) % uvec2(32, 32); \n "
" const uint address = stipple_coord.y * 32u + stipple_coord.x; \n "
" const uint bit_offset = (address & 31u); \n "
" const uint word_index = _get_bits(address, 7, 3); \n "
" const uint sub_index = _get_bits(address, 5, 2); \n \n "
2020-12-19 12:28:10 +01:00
2021-01-20 20:15:08 +01:00
" if (!_test_bit(stipple_pattern[word_index][sub_index], int(bit_offset))) \n "
2020-12-19 12:28:10 +01:00
" { \n "
" _kill(); \n "
" } \n "
" } \n \n " ;
}
void insert_rop ( std : : ostream & OS , const shader_properties & props )
{
const std : : string reg0 = props . fp32_outputs ? " r0 " : " h0 " ;
const std : : string reg1 = props . fp32_outputs ? " r2 " : " h4 " ;
const std : : string reg2 = props . fp32_outputs ? " r3 " : " h6 " ;
const std : : string reg3 = props . fp32_outputs ? " r4 " : " h8 " ;
if ( props . disable_early_discard )
{
OS < <
" if (_fragment_discard) \n "
" { \n "
" discard; \n "
2022-11-17 22:31:16 +01:00
" } \n \n " ;
2020-12-19 12:28:10 +01:00
}
2022-11-17 22:31:16 +01:00
// Pre-output stages
2020-12-19 12:28:10 +01:00
if ( ! props . fp32_outputs )
{
2022-11-17 22:31:16 +01:00
// Tested using NPUB90375; some shaders (32-bit output only?) do not obey srgb flags
const auto vtype = ( props . fp32_outputs | | ! props . supports_native_fp16 ) ? " vec4 " : " f16vec4 " ;
OS < <
" if (_test_bit(rop_control, SRGB_FRAMEBUFFER_BIT)) \n "
" { \n "
" " < < reg0 < < " = " < < vtype < < " (linear_to_srgb( " < < reg0 < < " ).rgb, " < < reg0 < < " .a); \n "
" " < < reg1 < < " = " < < vtype < < " (linear_to_srgb( " < < reg1 < < " ).rgb, " < < reg1 < < " .a); \n "
" " < < reg2 < < " = " < < vtype < < " (linear_to_srgb( " < < reg2 < < " ).rgb, " < < reg2 < < " .a); \n "
" " < < reg3 < < " = " < < vtype < < " (linear_to_srgb( " < < reg3 < < " ).rgb, " < < reg3 < < " .a); \n "
" } \n \n " ;
2020-12-19 12:28:10 +01:00
}
2022-11-17 22:31:16 +01:00
// Output conversion
if ( props . ROP_output_rounding )
2020-12-19 12:28:10 +01:00
{
2022-11-17 22:31:16 +01:00
OS < <
" if (_test_bit(rop_control, INT_FRAMEBUFFER_BIT)) \n "
" { \n "
" " < < reg0 < < " = round_to_8bit( " < < reg0 < < " ); \n "
" " < < reg1 < < " = round_to_8bit( " < < reg1 < < " ); \n "
" " < < reg2 < < " = round_to_8bit( " < < reg2 < < " ); \n "
" " < < reg3 < < " = round_to_8bit( " < < reg3 < < " ); \n "
" } \n \n " ;
2020-12-19 12:28:10 +01:00
}
2022-11-17 22:31:16 +01:00
// Post-output stages
// TODO: Implement all ROP options like CSAA and ALPHA_TO_ONE here
2020-12-19 12:28:10 +01:00
OS < <
2022-11-17 22:31:16 +01:00
// Alpha Testing
" if (_test_bit(rop_control, ALPHA_TEST_ENABLE_BIT)) \n "
" { \n "
" const uint alpha_func = _get_bits(rop_control, ALPHA_TEST_FUNC_OFFSET, ALPHA_TEST_FUNC_LENGTH); \n "
" if (!comparison_passes( " < < reg0 < < " .a, alpha_ref, alpha_func)) discard; \n "
" } \n \n " ;
2020-12-19 12:28:10 +01:00
2022-11-17 22:31:16 +01:00
// ALPHA_TO_COVERAGE
2020-12-19 12:28:10 +01:00
if ( props . emulate_coverage_tests )
{
OS < <
2022-11-17 22:31:16 +01:00
" if (_test_bit(rop_control, ALPHA_TO_COVERAGE_ENABLE_BIT)) \n "
" { \n "
" if (!_test_bit(rop_control, MSAA_WRITE_ENABLE_BIT) || \n "
" !coverage_test_passes( " < < reg0 < < " )) \n "
2020-12-19 12:28:10 +01:00
" { \n "
" discard; \n "
2022-11-17 22:31:16 +01:00
" } \n "
" } \n \n " ;
2020-12-19 12:28:10 +01:00
}
2022-11-17 22:31:16 +01:00
// Commit
2020-12-19 12:28:10 +01:00
OS < <
" ocol0 = " < < reg0 < < " ; \n "
" ocol1 = " < < reg1 < < " ; \n "
" ocol2 = " < < reg2 < < " ; \n "
" ocol3 = " < < reg3 < < " ; \n \n " ;
}
void insert_glsl_legacy_function ( std : : ostream & OS , const shader_properties & props )
{
OS < < " #define _select mix \n " ;
OS < < " #define _saturate(x) clamp(x, 0., 1.) \n " ;
OS < < " #define _get_bits(x, off, count) bitfieldExtract(x, off, count) \n " ;
OS < < " #define _set_bits(x, y, off, count) bitfieldInsert(x, y, off, count) \n " ;
OS < < " #define _test_bit(x, y) (_get_bits(x, y, 1) != 0) \n " ;
OS < < " #define _rand(seed) fract(sin(dot(seed.xy, vec2(12.9898f, 78.233f))) * 43758.5453f) \n \n " ;
2022-04-24 10:56:42 +02:00
if ( props . low_precision_tests )
{
OS < < " #define CMP_FIXUP(a) (sign(a) * 16. + a) \n \n " ;
}
else
{
OS < < " #define CMP_FIXUP(a) (a) \n \n " ;
}
2020-12-19 12:28:10 +01:00
if ( props . domain = = glsl : : program_domain : : glsl_fragment_program )
{
2022-11-16 15:37:49 +01:00
OS < < " // ROP control \n " ;
OS < < " #define ALPHA_TEST_ENABLE_BIT " < < rsx : : ROP_control_bits : : ALPHA_TEST_ENABLE_BIT < < " \n " ;
OS < < " #define SRGB_FRAMEBUFFER_BIT " < < rsx : : ROP_control_bits : : SRGB_FRAMEBUFFER_BIT < < " \n " ;
OS < < " #define ALPHA_TO_COVERAGE_ENABLE_BIT " < < rsx : : ROP_control_bits : : ALPHA_TO_COVERAGE_ENABLE_BIT < < " \n " ;
OS < < " #define MSAA_WRITE_ENABLE_BIT " < < rsx : : ROP_control_bits : : MSAA_WRITE_ENABLE_BIT < < " \n " ;
OS < < " #define INT_FRAMEBUFFER_BIT " < < rsx : : ROP_control_bits : : INT_FRAMEBUFFER_BIT < < " \n " ;
OS < < " #define POLYGON_STIPPLE_ENABLE_BIT " < < rsx : : ROP_control_bits : : POLYGON_STIPPLE_ENABLE_BIT < < " \n " ;
OS < < " #define ALPHA_TEST_FUNC_OFFSET " < < rsx : : ROP_control_bits : : ALPHA_FUNC_OFFSET < < " \n " ;
OS < < " #define ALPHA_TEST_FUNC_LENGTH " < < rsx : : ROP_control_bits : : ALPHA_FUNC_NUM_BITS < < " \n " ;
OS < < " #define MSAA_SAMPLE_CTRL_OFFSET " < < rsx : : ROP_control_bits : : MSAA_SAMPLE_CTRL_OFFSET < < " \n " ;
OS < < " #define MSAA_SAMPLE_CTRL_LENGTH " < < rsx : : ROP_control_bits : : MSAA_SAMPLE_CTRL_NUM_BITS < < " \n " ;
OS < < " #define ROP_CMD_MASK " < < rsx : : ROP_control_bits : : ROP_CMD_MASK < < " \n \n " ;
// 8-bit rounding/quantization
{
2022-11-21 14:44:59 +01:00
const auto _16bit_outputs = ( ! props . fp32_outputs & & props . supports_native_fp16 ) ;
const auto _255 = _16bit_outputs ? " f16vec4(255.) " : " vec4(255.) " ;
const auto _1_over_2 = _16bit_outputs ? " f16vec4(0.5) " : " vec4(0.5) " ;
2022-11-17 22:31:16 +01:00
OS < < " #define round_to_8bit(v4) (floor(fma(v4, " < < _255 < < " , " < < _1_over_2 < < " )) / " < < _255 < < " ) \n \n " ;
2022-11-16 15:37:49 +01:00
}
OS < < " // Workaround for broken early discard in some drivers \n " ;
2020-12-19 12:28:10 +01:00
if ( props . disable_early_discard )
{
OS < < " bool _fragment_discard = false; \n " ;
OS < < " #define _kill() _fragment_discard = true \n \n " ;
}
else
{
OS < < " #define _kill() discard \n \n " ;
}
if ( props . require_texture_ops )
{
// Declare special texture control flags
2022-09-29 11:04:38 +02:00
OS < < " #define GAMMA_R_MASK (1 << " < < rsx : : texture_control_bits : : GAMMA_R < < " ) \n " ;
OS < < " #define GAMMA_G_MASK (1 << " < < rsx : : texture_control_bits : : GAMMA_G < < " ) \n " ;
OS < < " #define GAMMA_B_MASK (1 << " < < rsx : : texture_control_bits : : GAMMA_B < < " ) \n " ;
OS < < " #define GAMMA_A_MASK (1 << " < < rsx : : texture_control_bits : : GAMMA_A < < " ) \n " ;
OS < < " #define EXPAND_R_MASK (1 << " < < rsx : : texture_control_bits : : EXPAND_R < < " ) \n " ;
OS < < " #define EXPAND_G_MASK (1 << " < < rsx : : texture_control_bits : : EXPAND_G < < " ) \n " ;
OS < < " #define EXPAND_B_MASK (1 << " < < rsx : : texture_control_bits : : EXPAND_B < < " ) \n " ;
OS < < " #define EXPAND_A_MASK (1 << " < < rsx : : texture_control_bits : : EXPAND_A < < " ) \n \n " ;
2022-11-16 15:37:49 +01:00
OS < < " #define ALPHAKILL " < < rsx : : texture_control_bits : : ALPHAKILL < < " \n " ;
OS < < " #define RENORMALIZE " < < rsx : : texture_control_bits : : RENORMALIZE < < " \n " ;
2022-09-29 11:04:38 +02:00
OS < < " #define DEPTH_FLOAT " < < rsx : : texture_control_bits : : DEPTH_FLOAT < < " \n " ;
OS < < " #define DEPTH_COMPARE " < < rsx : : texture_control_bits : : DEPTH_COMPARE_OP < < " \n " ;
OS < < " #define FILTERED_MAG_BIT " < < rsx : : texture_control_bits : : FILTERED_MAG < < " \n " ;
OS < < " #define FILTERED_MIN_BIT " < < rsx : : texture_control_bits : : FILTERED_MIN < < " \n " ;
2022-11-16 15:37:49 +01:00
OS < < " #define INT_COORDS_BIT " < < rsx : : texture_control_bits : : UNNORMALIZED_COORDS < < " \n " ;
2022-09-29 11:04:38 +02:00
OS < < " #define GAMMA_CTRL_MASK (GAMMA_R_MASK|GAMMA_G_MASK|GAMMA_B_MASK|GAMMA_A_MASK) \n " ;
OS < < " #define SIGN_EXPAND_MASK (EXPAND_R_MASK|EXPAND_G_MASK|EXPAND_B_MASK|EXPAND_A_MASK) \n " ;
OS < < " #define FILTERED_MASK (FILTERED_MAG_BIT|FILTERED_MIN_BIT) \n \n " ;
2020-12-19 12:28:10 +01:00
}
}
if ( props . require_lit_emulation )
{
OS < <
" vec4 lit_legacy(const in vec4 val) "
" { \n "
" vec4 clamped_val = val; \n "
" clamped_val.x = max(val.x, 0.); \n "
" clamped_val.y = max(val.y, 0.); \n "
" vec4 result; \n "
" result.x = 1.; \n "
" result.w = 1.; \n "
" result.y = clamped_val.x; \n "
" result.z = clamped_val.x > 0. ? exp(clamped_val.w * log(max(clamped_val.y, 0.0000000001))) : 0.; \n "
" return result; \n "
" } \n \n " ;
}
2022-10-06 02:20:09 +02:00
if ( props . domain = = glsl : : program_domain : : glsl_vertex_program )
2020-12-19 12:28:10 +01:00
{
2022-10-06 02:20:09 +02:00
if ( props . require_explicit_invariance )
2020-12-19 12:28:10 +01:00
{
2022-10-06 02:20:09 +02:00
// PS3 has shader invariance, but we don't really care about most attributes outside ATTR0
OS < < " invariant gl_Position; \n \n " ;
2020-12-19 12:28:10 +01:00
}
2022-10-06 02:20:09 +02:00
if ( props . emulate_zclip_transform )
2020-12-19 12:28:10 +01:00
{
2022-10-06 02:20:09 +02:00
if ( props . emulate_depth_clip_only )
{
// Technically the depth value here is the 'final' depth that should be stored in the Z buffer.
// Forward mapping eqn is d' = d * (f - n) + n, where d' is the stored Z value (this) and d is the normalized API value.
OS < <
" vec4 apply_zclip_xform(const in vec4 pos, const in float near_plane, const in float far_plane) \n "
" { \n "
" if (pos.w != 0.0) \n "
" { \n "
" const float real_n = min(far_plane, near_plane); \n "
" const float real_f = max(far_plane, near_plane); \n "
" const double depth_range = double(real_f - real_n); \n "
" const double inv_range = (depth_range > 0.000001) ? (1.0 / (depth_range * pos.w)) : 0.0; \n "
" const double actual_d = (double(pos.z) - double(real_n * pos.w)) * inv_range; \n "
" const double nearest_d = floor(actual_d + 0.5); \n "
" const double epsilon = (inv_range * pos.w) / 16777215.; \n " // Epsilon value is the minimum discernable change in Z that should affect the stored Z
" const double d = _select(actual_d, nearest_d, abs(actual_d - nearest_d) < epsilon); \n "
" return vec4(pos.xy, float(d * pos.w), pos.w); \n "
" } \n "
" else \n "
" { \n "
" return pos; \n " // Only values where Z=0 can ever pass this clip
" } \n "
" } \n \n " ;
}
else
{
OS < <
" vec4 apply_zclip_xform(const in vec4 pos, const in float near_plane, const in float far_plane) \n "
" { \n "
" float d = float(pos.z / pos.w); \n "
" if (d < 0.f && d >= near_plane) \n "
" { \n "
" // Clamp \n "
" d = 0.f; \n "
" } \n "
" else if (d > 1.f && d <= far_plane) \n "
" { \n "
" // Compress Z and store towards highest end of the range \n "
" d = min(1., 0.99 + (0.01 * (pos.z - near_plane) / (far_plane - near_plane))); \n "
" } \n "
" else \n " // This catch-call also handles w=0 since d=inf
" { \n "
" return pos; \n "
" } \n "
" \n "
" return vec4(pos.x, pos.y, d * pos.w, pos.w); \n "
" } \n \n " ;
}
2020-12-19 12:28:10 +01:00
}
return ;
}
2022-04-24 10:56:42 +02:00
program_common : : insert_compare_op ( OS ) ;
2020-12-19 12:28:10 +01:00
if ( props . emulate_coverage_tests )
{
// Purely stochastic
OS < <
2022-11-16 15:37:49 +01:00
" bool coverage_test_passes(const in vec4 _sample) \n "
2020-12-19 12:28:10 +01:00
" { \n "
" float random = _rand(gl_FragCoord); \n "
" return (_sample.a > random); \n "
" } \n \n " ;
}
2021-06-05 01:40:39 +02:00
if ( ! props . fp32_outputs | | props . require_linear_to_srgb )
2020-12-19 12:28:10 +01:00
{
OS < <
" vec4 linear_to_srgb(const in vec4 cl) \n "
" { \n "
" vec4 low = cl * 12.92; \n "
" vec4 high = 1.055 * pow(cl, vec4(1. / 2.4)) - 0.055; \n "
2021-12-12 21:35:56 +01:00
" bvec4 selection = lessThan(cl, vec4(0.0031308)); \n "
" return clamp(mix(high, low, selection), 0., 1.); \n "
2020-12-19 12:28:10 +01:00
" } \n \n " ;
}
2021-06-05 01:40:39 +02:00
if ( props . require_texture_ops | | props . require_srgb_to_linear )
{
OS < <
" vec4 srgb_to_linear(const in vec4 cs) \n "
" { \n "
" vec4 a = cs / 12.92; \n "
" vec4 b = pow((cs + 0.055) / 1.055, vec4(2.4)); \n "
" return _select(a, b, greaterThan(cs, vec4(0.04045))); \n "
" } \n \n " ;
}
2020-12-19 12:28:10 +01:00
if ( props . require_depth_conversion )
{
ensure ( props . require_texture_ops ) ;
//NOTE: Memory layout is fetched as byteswapped BGRA [GBAR] (GOW collection, DS2, DeS)
//The A component (Z) is useless (should contain stencil8 or just 1)
OS < <
" vec4 decode_depth24(const in float depth_value, const in bool depth_float) \n "
" { \n "
" uint value; \n "
" if (!depth_float) \n "
" value = uint(depth_value * 16777215.); \n "
" else \n "
" value = _get_bits(floatBitsToUint(depth_value), 7, 24); \n "
" \n "
" uint b = _get_bits(value, 0, 8); \n "
" uint g = _get_bits(value, 8, 8); \n "
" uint r = _get_bits(value, 16, 8); \n "
" return vec4(float(g)/255., float(b)/255., 1., float(r)/255.); \n "
" } \n \n "
" vec4 remap_vector(const in vec4 color, const in uint remap) \n "
" { \n "
" vec4 result; \n "
" if (_get_bits(remap, 0, 8) == 0xE4) \n "
" { \n "
" result = color; \n "
" } \n "
" else \n "
" { \n "
" uvec4 remap_channel = uvec4(remap) >> uvec4(2, 4, 6, 0); \n "
" remap_channel &= 3; \n "
" remap_channel = (remap_channel + 3) % 4; // Map A-R-G-B to R-G-B-A \n \n "
" // Generate remapped result \n "
" result.a = color[remap_channel.a]; \n "
" result.r = color[remap_channel.r]; \n "
" result.g = color[remap_channel.g]; \n "
" result.b = color[remap_channel.b]; \n "
" } \n \n "
" if (_get_bits(remap, 8, 8) == 0xAA) \n "
" return result; \n \n "
" uvec4 remap_select = uvec4(remap) >> uvec4(10, 12, 14, 8); \n "
" remap_select &= 3; \n "
" bvec4 choice = lessThan(remap_select, uvec4(2)); \n "
" return _select(result, vec4(remap_select), choice); \n "
" } \n \n "
2021-10-05 19:33:58 +02:00
" vec4 convert_z24x8_to_rgba8(const in vec2 depth_stencil, const in uint remap, const in uint flags) \n "
2020-12-19 12:28:10 +01:00
" { \n "
2021-10-05 19:33:58 +02:00
" vec4 result = decode_depth24(depth_stencil.x, _test_bit(flags, DEPTH_FLOAT)); \n "
" result.z = depth_stencil.y / 255.; \n \n "
2020-12-19 12:28:10 +01:00
" if (remap == 0xAAE4) \n "
" return result; \n \n "
" return remap_vector(result, remap); \n "
" } \n \n " ;
}
if ( props . require_texture_ops )
{
OS < <
//TODO: Move all the texture read control operations here
" vec4 process_texel(in vec4 rgba, const in uint control_bits) \n "
" { \n "
" if (control_bits == 0) \n "
" { \n "
" return rgba; \n "
" } \n "
" \n "
" if (_test_bit(control_bits, ALPHAKILL)) \n "
" { \n "
" // Alphakill \n "
" if (rgba.a < 0.000001) \n "
" { \n "
" _kill(); \n "
" return rgba; \n "
" } \n "
" } \n "
" \n "
" if (_test_bit(control_bits, RENORMALIZE)) \n "
" { \n "
" // Renormalize to 8-bit (PS3) accuracy \n "
" rgba = floor(rgba * 255.); \n "
2022-03-30 01:19:17 +02:00
" rgba /= 255.; \n "
2020-12-19 12:28:10 +01:00
" } \n "
" \n "
" uvec4 mask; \n "
" vec4 convert; \n "
2022-06-12 19:05:43 +02:00
" uint op_mask = control_bits & uint(SIGN_EXPAND_MASK); \n "
2020-12-19 12:28:10 +01:00
" \n "
" if (op_mask != 0) \n "
" { \n "
" // Expand to signed normalized \n "
" mask = uvec4(op_mask) & uvec4(EXPAND_R_MASK, EXPAND_G_MASK, EXPAND_B_MASK, EXPAND_A_MASK); \n "
" convert = (rgba * 2.f - 1.f); \n "
" rgba = _select(rgba, convert, notEqual(mask, uvec4(0))); \n "
" } \n "
" \n "
2022-06-12 19:05:43 +02:00
" op_mask = control_bits & uint(GAMMA_CTRL_MASK); \n "
2020-12-19 12:28:10 +01:00
" if (op_mask != 0u) \n "
" { \n "
" // Gamma correction \n "
" mask = uvec4(op_mask) & uvec4(GAMMA_R_MASK, GAMMA_G_MASK, GAMMA_B_MASK, GAMMA_A_MASK); \n "
" convert = srgb_to_linear(rgba); \n "
" return _select(rgba, convert, notEqual(mask, uvec4(0))); \n "
" } \n "
" \n "
" return rgba; \n "
" } \n \n " ;
if ( props . require_texture_expand )
{
OS < <
" uint _texture_flag_override = 0; \n "
" #define _enable_texture_expand() _texture_flag_override = SIGN_EXPAND_MASK \n "
" #define _disable_texture_expand() _texture_flag_override = 0 \n "
" #define TEX_FLAGS(index) (texture_parameters[index].flags | _texture_flag_override) \n " ;
}
else
{
OS < <
" #define TEX_FLAGS(index) texture_parameters[index].flags \n " ;
}
OS < <
" #define TEX_NAME(index) tex##index \n "
" #define TEX_NAME_STENCIL(index) tex##index##_stencil \n \n "
2021-07-31 16:27:16 +02:00
" #define COORD_SCALE1(index, coord1) ((coord1 + texture_parameters[index].scale_bias.w) * texture_parameters[index].scale_bias.x) \n "
" #define COORD_SCALE2(index, coord2) ((coord2 + texture_parameters[index].scale_bias.w) * texture_parameters[index].scale_bias.xy) \n "
" #define COORD_SCALE3(index, coord3) ((coord3 + texture_parameters[index].scale_bias.w) * texture_parameters[index].scale_bias.xyz) \n \n "
2020-12-19 12:28:10 +01:00
2021-07-31 16:27:16 +02:00
" #define TEX1D(index, coord1) process_texel(texture(TEX_NAME(index), COORD_SCALE1(index, coord1)), TEX_FLAGS(index)) \n "
" #define TEX1D_BIAS(index, coord1, bias) process_texel(texture(TEX_NAME(index), COORD_SCALE1(index, coord1), bias), TEX_FLAGS(index)) \n "
" #define TEX1D_LOD(index, coord1, lod) process_texel(textureLod(TEX_NAME(index), COORD_SCALE1(index, coord1), lod), TEX_FLAGS(index)) \n "
" #define TEX1D_GRAD(index, coord1, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), COORD_SCALE1(index, coord1), dpdx, dpdy), TEX_FLAGS(index)) \n "
2021-10-12 11:16:38 +02:00
" #define TEX1D_PROJ(index, coord4) process_texel(textureProj(TEX_NAME(index), vec2(COORD_SCALE1(index, coord4.x), coord4.w)), TEX_FLAGS(index)) \n "
2020-12-19 12:28:10 +01:00
2021-07-31 16:27:16 +02:00
" #define TEX2D(index, coord2) process_texel(texture(TEX_NAME(index), COORD_SCALE2(index, coord2)), TEX_FLAGS(index)) \n "
" #define TEX2D_BIAS(index, coord2, bias) process_texel(texture(TEX_NAME(index), COORD_SCALE2(index, coord2), bias), TEX_FLAGS(index)) \n "
" #define TEX2D_LOD(index, coord2, lod) process_texel(textureLod(TEX_NAME(index), COORD_SCALE2(index, coord2), lod), TEX_FLAGS(index)) \n "
" #define TEX2D_GRAD(index, coord2, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), COORD_SCALE2(index, coord2), dpdx, dpdy), TEX_FLAGS(index)) \n "
2021-10-05 19:33:58 +02:00
" #define TEX2D_PROJ(index, coord4) process_texel(textureProj(TEX_NAME(index), vec4(COORD_SCALE2(index, coord4.xy), coord4.z, coord4.w)), TEX_FLAGS(index)) \n \n " ;
2020-12-19 12:28:10 +01:00
if ( props . emulate_shadow_compare )
{
OS < <
2021-07-31 16:27:16 +02:00
" #define SHADOW_COORD(index, coord3) vec3(COORD_SCALE2(index, coord3.xy), _test_bit(TEX_FLAGS(index), DEPTH_FLOAT)? coord3.z : min(float(coord3.z), 1.0)) \n "
" #define SHADOW_COORD4(index, coord4) vec4(SHADOW_COORD(index, coord4.xyz), coord4.w) \n "
" #define SHADOW_COORD_PROJ(index, coord4) vec4(COORD_SCALE2(index, coord4.xy), _test_bit(TEX_FLAGS(index), DEPTH_FLOAT)? coord4.z : min(coord4.z, coord4.w), coord4.w) \n \n "
" #define TEX2D_SHADOW(index, coord3) texture(TEX_NAME(index), SHADOW_COORD(index, coord3)) \n "
2021-10-05 19:33:58 +02:00
" #define TEX3D_SHADOW(index, coord4) texture(TEX_NAME(index), SHADOW_COORD4(index, coord4)) \n "
2021-07-31 16:27:16 +02:00
" #define TEX2D_SHADOWPROJ(index, coord4) textureProj(TEX_NAME(index), SHADOW_COORD_PROJ(index, coord4)) \n " ;
2020-12-19 12:28:10 +01:00
}
else
{
OS < <
2021-07-31 16:27:16 +02:00
" #define TEX2D_SHADOW(index, coord3) texture(TEX_NAME(index), vec3(COORD_SCALE2(index, coord3.xy), coord3.z)) \n "
2021-10-05 19:33:58 +02:00
" #define TEX3D_SHADOW(index, coord4) texture(TEX_NAME(index), vec4(COORD_SCALE3(index, coord4.xyz), coord4.w)) \n "
2021-08-06 23:49:50 +02:00
" #define TEX2D_SHADOWPROJ(index, coord4) textureProj(TEX_NAME(index), vec4(COORD_SCALE2(index, coord4.xy), coord4.zw)) \n " ;
2020-12-19 12:28:10 +01:00
}
OS < <
2021-07-31 16:27:16 +02:00
" #define TEX3D(index, coord3) process_texel(texture(TEX_NAME(index), COORD_SCALE3(index, coord3)), TEX_FLAGS(index)) \n "
" #define TEX3D_BIAS(index, coord3, bias) process_texel(texture(TEX_NAME(index), COORD_SCALE3(index, coord3), bias), TEX_FLAGS(index)) \n "
" #define TEX3D_LOD(index, coord3, lod) process_texel(textureLod(TEX_NAME(index), COORD_SCALE3(index, coord3), lod), TEX_FLAGS(index)) \n "
" #define TEX3D_GRAD(index, coord3, dpdx, dpdy) process_texel(textureGrad(TEX_NAME(index), COORD_SCALE3(index, coord3), dpdx, dpdy), TEX_FLAGS(index)) \n "
2021-10-12 11:16:38 +02:00
" #define TEX3D_PROJ(index, coord4) process_texel(texture(TEX_NAME(index), COORD_SCALE3(index, coord4.xyz) / coord4.w), TEX_FLAGS(index)) \n \n " ;
2021-10-05 19:33:58 +02:00
if ( props . require_depth_conversion )
{
OS < <
" #define ZS_READ(index, coord) vec2(texture(TEX_NAME(index), coord).r, float(texture(TEX_NAME_STENCIL(index), coord).x)) \n "
" #define TEX1D_Z24X8_RGBA8(index, coord1) process_texel(convert_z24x8_to_rgba8(ZS_READ(index, COORD_SCALE1(index, coord1)), texture_parameters[index].remap, TEX_FLAGS(index)), TEX_FLAGS(index)) \n "
" #define TEX2D_Z24X8_RGBA8(index, coord2) process_texel(convert_z24x8_to_rgba8(ZS_READ(index, COORD_SCALE2(index, coord2)), texture_parameters[index].remap, TEX_FLAGS(index)), TEX_FLAGS(index)) \n "
" #define TEX3D_Z24X8_RGBA8(index, coord3) process_texel(convert_z24x8_to_rgba8(ZS_READ(index, COORD_SCALE3(index, coord3)), texture_parameters[index].remap, TEX_FLAGS(index)), TEX_FLAGS(index)) \n \n " ;
}
2022-03-13 09:32:04 +01:00
if ( props . require_msaa_ops )
{
OS < <
" #define ZCOMPARE_FUNC(index) _get_bits(TEX_FLAGS(index), DEPTH_COMPARE, 3) \n "
" #define ZS_READ_MS(index, coord) vec2(sampleTexture2DMS(TEX_NAME(index), coord, index).r, float(sampleTexture2DMS(TEX_NAME_STENCIL(index), coord, index).x)) \n "
" #define TEX2D_MS(index, coord2) process_texel(sampleTexture2DMS(TEX_NAME(index), coord2, index), TEX_FLAGS(index)) \n "
" #define TEX2D_SHADOW_MS(index, coord3) vec4(comparison_passes(sampleTexture2DMS(TEX_NAME(index), coord3.xy, index).x, coord3.z, ZCOMPARE_FUNC(index))) \n "
" #define TEX2D_SHADOWPROJ_MS(index, coord4) TEX2D_SHADOW_MS(index, (coord4.xyz / coord4.w)) \n "
" #define TEX2D_Z24X8_RGBA8_MS(index, coord2) process_texel(convert_z24x8_to_rgba8(ZS_READ_MS(index, coord2), texture_parameters[index].remap, TEX_FLAGS(index)), TEX_FLAGS(index)) \n \n " ;
2022-03-31 22:13:35 +02:00
OS < <
2022-04-01 20:53:25 +02:00
" vec3 compute2x2DownsampleWeights(const in float coord, const in float uv_step, const in float actual_step) "
" { \n "
" const float next_sample_point = coord + actual_step; \n "
" const float next_coord_step = fma(floor(coord / uv_step), uv_step, uv_step); \n "
" const float next_coord_step_plus_one = next_coord_step + uv_step; \n "
" vec3 weights = vec3(next_coord_step, min(next_coord_step_plus_one, next_sample_point), max(next_coord_step_plus_one, next_sample_point)) - vec3(coord, next_coord_step, next_coord_step_plus_one); \n "
" return weights / actual_step; \n "
" } \n \n " ;
2022-03-31 22:13:35 +02:00
2022-03-13 09:32:04 +01:00
auto insert_msaa_sample_code = [ & OS ] ( const std : : string_view & sampler_type )
{
OS < <
2022-04-01 20:53:25 +02:00
" vec4 texelFetch2DMS(in " < < sampler_type < < " tex, const in vec2 sample_count, const in ivec2 icoords, const in int index, const in ivec2 offset) \n "
2022-03-13 09:32:04 +01:00
" { \n "
2022-04-01 20:53:25 +02:00
" const vec2 resolve_coords = vec2(icoords + offset); \n "
" const vec2 aa_coords = floor(resolve_coords / sample_count); \n " // AA coords = real_coords / sample_count
" const vec2 sample_loc = fma(aa_coords, -sample_count, resolve_coords); \n " // Sample ID = real_coords % sample_count
" const float sample_index = fma(sample_loc.y, sample_count.y, sample_loc.x); \n "
" return texelFetch(tex, ivec2(aa_coords), int(sample_index)); \n "
2022-03-30 01:19:17 +02:00
" } \n \n "
" vec4 sampleTexture2DMS(in " < < sampler_type < < " tex, const in vec2 coords, const in int index) \n "
" { \n "
" const uint flags = TEX_FLAGS(index); \n "
" const vec2 normalized_coords = COORD_SCALE2(index, coords); \n "
2022-04-01 20:53:25 +02:00
" const vec2 sample_count = vec2(2., textureSamples(tex) * 0.5); \n "
" const vec2 image_size = textureSize(tex) * sample_count; \n "
2022-03-30 01:19:17 +02:00
" const ivec2 icoords = ivec2(normalized_coords * image_size); \n "
" const vec4 sample0 = texelFetch2DMS(tex, sample_count, icoords, index, ivec2(0)); \n "
" \n "
2022-04-01 20:53:25 +02:00
" if (_get_bits(flags, FILTERED_MAG_BIT, 2) == 0) \n "
2022-03-30 01:19:17 +02:00
" { \n "
" return sample0; \n "
" } \n "
" \n "
2022-03-30 21:09:13 +02:00
" // Bilinear scaling, with upto 2x2 downscaling with simple weights \n "
2022-03-30 01:19:17 +02:00
" const vec2 uv_step = 1.0 / vec2(image_size); \n "
" const vec2 actual_step = vec2(dFdx(normalized_coords.x), dFdy(normalized_coords.y)); \n "
2022-03-30 21:09:13 +02:00
" \n "
2022-04-01 20:53:25 +02:00
" const bvec2 no_filter = lessThan(abs(uv_step - actual_step), vec2(0.000001)); \n "
" if (no_filter.x && no_filter.y) \n "
2022-03-30 21:09:13 +02:00
" { \n "
" return sample0; \n "
" } \n "
" \n "
" vec4 a, b; \n "
" float factor; \n "
2022-04-01 20:53:25 +02:00
" const vec4 sample2 = texelFetch2DMS(tex, sample_count, icoords, index, ivec2(0, 1)); // Top left \n "
2022-03-30 21:09:13 +02:00
" \n "
2022-04-01 20:53:25 +02:00
" if (no_filter.x) \n "
2022-03-30 21:09:13 +02:00
" { \n "
" // No scaling, 1:1 \n "
" a = sample0; \n "
" b = sample2; \n "
" } \n "
2022-04-01 20:53:25 +02:00
" else \n "
" { \n "
" // Filter required, sample more data \n "
" const vec4 sample1 = texelFetch2DMS(tex, sample_count, icoords, index, ivec2(1, 0)); // Bottom right \n "
" const vec4 sample3 = texelFetch2DMS(tex, sample_count, icoords, index, ivec2(1, 1)); // Top right \n "
" \n "
" if (actual_step.x > uv_step.x) \n "
" { \n "
" // Downscale in X, centered \n "
" const vec3 weights = compute2x2DownsampleWeights(normalized_coords.x, uv_step.x, actual_step.x); \n "
" \n "
" const vec4 sample4 = texelFetch2DMS(tex, sample_count, icoords, index, ivec2(2, 0)); // Further bottom right \n "
" a = fma(sample0, weights.xxxx, sample1 * weights.y) + (sample4 * weights.z); // Weighted sum \n "
" \n "
" if (!no_filter.y) \n "
" { \n "
" const vec4 sample5 = texelFetch2DMS(tex, sample_count, icoords, index, ivec2(2, 1)); // Further top right \n "
" b = fma(sample2, weights.xxxx, sample3 * weights.y) + (sample5 * weights.z); // Weighted sum \n "
" } \n "
" } \n "
" else if (actual_step.x < uv_step.x) \n "
" { \n "
" // Upscale in X \n "
" factor = fract(normalized_coords.x * image_size.x); \n "
" a = mix(sample0, sample1, factor); \n "
" b = mix(sample2, sample3, factor); \n "
" } \n "
" } \n "
2022-03-30 21:09:13 +02:00
" \n "
2022-04-01 20:53:25 +02:00
" if (no_filter.y) \n "
" { \n "
" // 1:1 no scale \n "
" return a; \n "
" } \n "
" else if (actual_step.y > uv_step.y) \n "
2022-03-30 21:09:13 +02:00
" { \n "
" // Downscale in Y \n "
2022-03-31 22:13:35 +02:00
" const vec3 weights = compute2x2DownsampleWeights(normalized_coords.y, uv_step.y, actual_step.y); \n "
" // We only have 2 rows computed for performance reasons, so combine rows 1 and 2 \n "
" return a * weights.x + b * (weights.y + weights.z); \n "
2022-03-30 21:09:13 +02:00
" } \n "
" else if (actual_step.y < uv_step.y) \n "
" { \n "
" // Upscale in Y \n "
2022-03-30 21:11:28 +02:00
" factor = fract(normalized_coords.y * image_size.y); \n "
2022-03-30 21:09:13 +02:00
" return mix(a, b, factor); \n "
" } \n "
2022-03-13 09:32:04 +01:00
" } \n \n " ;
} ;
insert_msaa_sample_code ( " sampler2DMS " ) ;
if ( props . require_depth_conversion )
{
insert_msaa_sample_code ( " usampler2DMS " ) ;
}
}
2020-12-19 12:28:10 +01:00
}
if ( props . require_wpos )
{
OS < <
" vec4 get_wpos() \n "
" { \n "
" float abs_scale = abs(wpos_scale); \n "
" return (gl_FragCoord * vec4(abs_scale, wpos_scale, 1., 1.)) + vec4(0., wpos_bias, 0., 0.); \n "
" } \n \n " ;
}
}
std : : string getFunctionImpl ( FUNCTION f )
{
switch ( f )
{
default :
abort ( ) ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : DP2 :
2020-12-19 12:28:10 +01:00
return " $Ty(dot($0.xy, $1.xy)) " ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : DP2A :
2020-12-19 12:28:10 +01:00
return " $Ty(dot($0.xy, $1.xy) + $2.x) " ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : DP3 :
2020-12-19 12:28:10 +01:00
return " $Ty(dot($0.xyz, $1.xyz)) " ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : DP4 :
2020-12-19 12:28:10 +01:00
return " $Ty(dot($0, $1)) " ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : DPH :
2020-12-19 12:28:10 +01:00
return " $Ty(dot(vec4($0.xyz, 1.0), $1)) " ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : SFL :
2020-12-19 12:28:10 +01:00
return " $Ty(0.) " ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : STR :
2020-12-19 12:28:10 +01:00
return " $Ty(1.) " ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : FRACT :
2020-12-19 12:28:10 +01:00
return " fract($0) " ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : REFL :
2020-12-19 12:28:10 +01:00
return " reflect($0, $1) " ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : TEXTURE_SAMPLE1D :
2020-12-19 12:28:10 +01:00
return " TEX1D($_i, $0.x) " ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : TEXTURE_SAMPLE1D_BIAS :
2020-12-19 12:28:10 +01:00
return " TEX1D_BIAS($_i, $0.x, $1.x) " ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : TEXTURE_SAMPLE1D_PROJ :
return " TEX1D_PROJ($_i, $0) " ;
case FUNCTION : : TEXTURE_SAMPLE1D_LOD :
2020-12-19 12:28:10 +01:00
return " TEX1D_LOD($_i, $0.x, $1.x) " ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : TEXTURE_SAMPLE1D_GRAD :
2020-12-19 12:28:10 +01:00
return " TEX1D_GRAD($_i, $0.x, $1.x, $2.x) " ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : TEXTURE_SAMPLE1D_SHADOW :
case FUNCTION : : TEXTURE_SAMPLE1D_SHADOW_PROJ :
// Unimplemented
break ;
case FUNCTION : : TEXTURE_SAMPLE1D_DEPTH_RGBA :
return " TEX1D_Z24X8_RGBA8($_i, $0.x) " ;
case FUNCTION : : TEXTURE_SAMPLE1D_DEPTH_RGBA_PROJ :
return " TEX1D_Z24X8_RGBA8($_i, ($0.x / $0.w)) " ;
case FUNCTION : : TEXTURE_SAMPLE2D :
2020-12-19 12:28:10 +01:00
return " TEX2D($_i, $0.xy) " ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : TEXTURE_SAMPLE2D_BIAS :
2020-12-19 12:28:10 +01:00
return " TEX2D_BIAS($_i, $0.xy, $1.x) " ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : TEXTURE_SAMPLE2D_PROJ :
2020-12-19 12:28:10 +01:00
return " TEX2D_PROJ($_i, $0) " ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : TEXTURE_SAMPLE2D_LOD :
2020-12-19 12:28:10 +01:00
return " TEX2D_LOD($_i, $0.xy, $1.x) " ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : TEXTURE_SAMPLE2D_GRAD :
2020-12-19 12:28:10 +01:00
return " TEX2D_GRAD($_i, $0.xy, $1.xy, $2.xy) " ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : TEXTURE_SAMPLE2D_SHADOW :
2020-12-19 12:28:10 +01:00
return " TEX2D_SHADOW($_i, $0.xyz) " ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : TEXTURE_SAMPLE2D_SHADOW_PROJ :
2020-12-19 12:28:10 +01:00
return " TEX2D_SHADOWPROJ($_i, $0) " ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : TEXTURE_SAMPLE2D_DEPTH_RGBA :
return " TEX2D_Z24X8_RGBA8($_i, $0.xy) " ;
case FUNCTION : : TEXTURE_SAMPLE2D_DEPTH_RGBA_PROJ :
return " TEX2D_Z24X8_RGBA8($_i, ($0.xy / $0.w)) " ;
case FUNCTION : : TEXTURE_SAMPLE3D :
2020-12-19 12:28:10 +01:00
return " TEX3D($_i, $0.xyz) " ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : TEXTURE_SAMPLE3D_BIAS :
2020-12-19 12:28:10 +01:00
return " TEX3D_BIAS($_i, $0.xyz, $1.x) " ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : TEXTURE_SAMPLE3D_PROJ :
2020-12-19 12:28:10 +01:00
return " TEX3D_PROJ($_i, $0) " ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : TEXTURE_SAMPLE3D_LOD :
2020-12-19 12:28:10 +01:00
return " TEX3D_LOD($_i, $0.xyz, $1.x) " ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : TEXTURE_SAMPLE3D_GRAD :
2020-12-19 12:28:10 +01:00
return " TEX3D_GRAD($_i, $0.xyz, $1.xyz, $2.xyz) " ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : TEXTURE_SAMPLE3D_SHADOW :
return " TEX3D_SHADOW($_i, $0) " ;
case FUNCTION : : TEXTURE_SAMPLE3D_SHADOW_PROJ :
// Impossible
break ;
case FUNCTION : : TEXTURE_SAMPLE3D_DEPTH_RGBA :
return " TEX3D_Z24X8_RGBA8($_i, $0.xyz) " ;
case FUNCTION : : TEXTURE_SAMPLE3D_DEPTH_RGBA_PROJ :
return " TEX3D_Z24X8_RGBA8($_i, ($0.xyz / $0.w)) " ;
2022-03-13 09:32:04 +01:00
case FUNCTION : : TEXTURE_SAMPLE2DMS :
case FUNCTION : : TEXTURE_SAMPLE2DMS_BIAS :
return " TEX2D_MS($_i, $0.xy) " ;
case FUNCTION : : TEXTURE_SAMPLE2DMS_PROJ :
return " TEX2D_MS($_i, $0.xy / $0.w) " ;
case FUNCTION : : TEXTURE_SAMPLE2DMS_LOD :
case FUNCTION : : TEXTURE_SAMPLE2DMS_GRAD :
return " TEX2D_MS($_i, $0.xy) " ;
case FUNCTION : : TEXTURE_SAMPLE2DMS_SHADOW :
return " TEX2D_SHADOW_MS($_i, $0.xyz) " ;
case FUNCTION : : TEXTURE_SAMPLE2DMS_SHADOW_PROJ :
return " TEX2D_SHADOWPROJ_MS($_i, $0) " ;
case FUNCTION : : TEXTURE_SAMPLE2DMS_DEPTH_RGBA :
return " TEX2D_Z24X8_RGBA8_MS($_i, $0.xy) " ;
case FUNCTION : : TEXTURE_SAMPLE2DMS_DEPTH_RGBA_PROJ :
return " TEX2D_Z24X8_RGBA8_MS($_i, ($0.xy / $0.w)) " ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : DFDX :
2020-12-19 12:28:10 +01:00
return " dFdx($0) " ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : DFDY :
2020-12-19 12:28:10 +01:00
return " dFdy($0) " ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : VERTEX_TEXTURE_FETCH1D :
2020-12-19 12:28:10 +01:00
return " textureLod($t, $0.x, 0) " ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : VERTEX_TEXTURE_FETCH2D :
2020-12-19 12:28:10 +01:00
return " textureLod($t, $0.xy, 0) " ;
2021-10-05 19:33:58 +02:00
case FUNCTION : : VERTEX_TEXTURE_FETCH3D :
case FUNCTION : : VERTEX_TEXTURE_FETCHCUBE :
2020-12-19 12:28:10 +01:00
return " textureLod($t, $0.xyz, 0) " ;
2022-03-13 09:32:04 +01:00
case FUNCTION : : VERTEX_TEXTURE_FETCH2DMS :
return " texelFetch($t, ivec2($0.xy * textureSize($t)), 0) " ;
2020-12-19 12:28:10 +01:00
}
2021-10-05 19:33:58 +02:00
rsx_log . error ( " Unexpected function request: %d " , static_cast < int > ( f ) ) ;
return " $Ty(0.) " ;
2020-12-19 12:28:10 +01:00
}
void insert_subheader_block ( std : : ostream & OS )
{
// Global types and stuff
// Must be compatible with std140 packing rules
OS < <
" struct sampler_info \n "
" { \n "
2021-07-31 16:27:16 +02:00
" vec4 scale_bias; \n "
2020-12-19 12:28:10 +01:00
" uint remap; \n "
" uint flags; \n "
2022-11-17 22:31:16 +01:00
" }; \n \n " ;
2020-12-19 12:28:10 +01:00
}
2022-12-05 20:28:43 +01:00
void insert_fragment_shader_inputs_block (
std : : stringstream & OS ,
2022-12-10 16:20:43 +01:00
const std : : string_view ext_flavour ,
2022-12-05 20:28:43 +01:00
const RSXFragmentProgram & prog ,
const std : : vector < ParamType > & params ,
const two_sided_lighting_config & _2sided_lighting ,
std : : function < int ( std : : string_view ) > varying_location )
{
struct _varying_register_config
{
int location ;
std : : string name ;
std : : string type ;
} ;
2022-12-10 16:20:43 +01:00
std : : vector < _varying_register_config > varying_list ;
2022-12-05 20:28:43 +01:00
for ( const ParamType & PT : params )
{
for ( const ParamItem & PI : PT . items )
{
// ssa is defined in the program body and is not a varying type
if ( PI . name = = " ssa " ) continue ;
const auto reg_location = varying_location ( PI . name ) ;
std : : string var_name = PI . name ;
if ( var_name = = " fogc " )
{
var_name = " fog_c " ;
}
else if ( prog . two_sided_lighting )
{
if ( var_name = = " diff_color " )
{
var_name = " diff_color0 " ;
}
else if ( var_name = = " spec_color " )
{
var_name = " spec_color0 " ;
}
}
varying_list . push_back ( { reg_location , var_name , PT . type } ) ;
}
}
if ( prog . two_sided_lighting )
{
if ( _2sided_lighting . two_sided_color )
{
varying_list . push_back ( { varying_location ( " diff_color1 " ) , " diff_color1 " , " vec4 " } ) ;
}
if ( _2sided_lighting . two_sided_specular )
{
varying_list . push_back ( { varying_location ( " spec_color1 " ) , " spec_color1 " , " vec4 " } ) ;
}
}
if ( varying_list . empty ( ) )
{
return ;
}
// Make the output a little nicer
2022-12-10 16:20:43 +01:00
std : : sort ( varying_list . begin ( ) , varying_list . end ( ) , FN ( x . location < y . location ) ) ;
2022-12-05 20:28:43 +01:00
if ( ! ( prog . ctrl & RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION ) )
{
for ( const auto & reg : varying_list )
{
OS < < " layout(location= " < < reg . location < < " ) in " < < reg . type < < " " < < reg . name < < " ; \n " ;
}
2022-12-10 16:20:43 +01:00
OS < < " \n " ;
2022-12-05 20:28:43 +01:00
return ;
}
for ( const auto & reg : varying_list )
{
2022-12-10 16:20:43 +01:00
OS < < " layout(location= " < < reg . location < < " ) pervertex " < < ext_flavour < < " in " < < reg . type < < " " < < reg . name < < " _raw[3]; \n " ;
2022-12-05 20:28:43 +01:00
}
// Interpolate the input attributes manually.
// Matches AMD behavior where gl_BaryCoordSmoothAMD only provides x and y with z being autogenerated.
2023-05-02 14:42:36 +02:00
std : : string interpolate_function_block =
" \n "
" vec4 _interpolate_varying3(const in vec4[3] v) \n "
" { \n "
2023-05-02 18:05:52 +02:00
// In the corner case where v[0] == v[1] == v[2], this algorithm generates a perfect result vs alternatives that use weighted multiply + add.
// Due to the finite precision of floating point arithmetic, adding together the result of different multiplies yeields a slightly inaccurate result which breaks things.
2023-05-02 14:42:36 +02:00
" const vec4 p10 = v[1] - v[0]; \n "
" const vec4 p20 = v[2] - v[0]; \n "
" return v[0] + p10 * $gl_BaryCoord.y + p20 * $gl_BaryCoord.z; \n "
" } \n \n " ;
2023-04-18 12:19:57 +02:00
OS < < fmt : : replace_all ( interpolate_function_block , { { " $gl_BaryCoord " , " gl_BaryCoord " s + std : : string ( ext_flavour ) } } ) ;
2022-12-05 20:28:43 +01:00
for ( const auto & reg : varying_list )
{
OS < < " vec4 " < < reg . name < < " = _interpolate_varying3( " < < reg . name < < " _raw); \n " ;
}
OS < < " \n " ;
}
2020-12-19 12:28:10 +01:00
}