rsx: Move heavy codegen capabilities to permutation flags from runtime checks

This commit is contained in:
kd-11 2025-12-14 07:26:30 +03:00 committed by kd-11
parent 3cb3f6972e
commit 20dcfa5c8a
10 changed files with 123 additions and 109 deletions

View file

@ -687,73 +687,6 @@ namespace rsx
rop_control.enable_alpha_test();
}
if (REGS(m_ctx)->polygon_stipple_enabled())
{
rop_control.enable_polygon_stipple();
}
auto can_use_hw_a2c = [&]() -> bool
{
const auto& config = RSX(m_ctx)->get_backend_config();
if (!config.supports_hw_a2c)
{
return false;
}
if (config.supports_hw_a2c_1spp)
{
return true;
}
return REGS(m_ctx)->surface_antialias() != rsx::surface_antialiasing::center_1_sample;
};
if (REGS(m_ctx)->msaa_alpha_to_coverage_enabled() && !can_use_hw_a2c())
{
// TODO: Properly support alpha-to-coverage and alpha-to-one behavior in shaders
// Alpha values generate a coverage mask for order independent blending
// Requires hardware AA to work properly (or just fragment sample stage in fragment shaders)
// Simulated using combined alpha blend and alpha test
rop_control.enable_alpha_to_coverage();
if (REGS(m_ctx)->msaa_sample_mask())
{
rop_control.enable_MSAA_writes();
}
// Sample configuration bits
switch (REGS(m_ctx)->surface_antialias())
{
case rsx::surface_antialiasing::center_1_sample:
break;
case rsx::surface_antialiasing::diagonal_centered_2_samples:
rop_control.set_msaa_control(1u);
break;
default:
rop_control.set_msaa_control(3u);
break;
}
}
// Check if framebuffer is actually an XRGB format and not a WZYX format
switch (REGS(m_ctx)->surface_color())
{
case rsx::surface_color_format::w16z16y16x16:
case rsx::surface_color_format::w32z32y32x32:
case rsx::surface_color_format::x32:
// These behave very differently from "normal" formats.
break;
default:
// Integer framebuffer formats.
rop_control.enable_framebuffer_INT();
// Check if we want sRGB conversion.
if (REGS(m_ctx)->framebuffer_srgb_enabled())
{
rop_control.enable_framebuffer_sRGB();
}
break;
}
// Generate wpos coefficients
// wpos equation is now as follows (ignoring pixel center offset):
// wpos.y = (frag_coord / resolution_scale) * ((window_origin!=top)?-1.: 1.) + ((window_origin!=top)? window_height : 0)
@ -766,7 +699,6 @@ namespace rsx
payload.rop_control = rop_control.value;
payload.alpha_ref = REGS(m_ctx)->alpha_ref();
const auto window_origin = REGS(m_ctx)->shader_window_origin();
const u32 window_height = REGS(m_ctx)->shader_window_height();
const auto pixel_center = REGS(m_ctx)->pixel_center();

View file

@ -219,16 +219,24 @@ void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
m_shader_props.require_srgb_to_linear = properties.has_upg;
m_shader_props.require_linear_to_srgb = properties.has_pkg;
m_shader_props.require_fog_read = properties.in_register_mask & in_fogc;
m_shader_props.emulate_coverage_tests = !rsx::get_renderer_backend_config().supports_hw_a2c_1spp;
m_shader_props.emulate_shadow_compare = device_props.emulate_depth_compare;
m_shader_props.low_precision_tests = ::gl::get_driver_caps().vendor_NVIDIA && !(m_prog.ctrl & RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION);
m_shader_props.disable_early_discard = !::gl::get_driver_caps().vendor_NVIDIA;
m_shader_props.supports_native_fp16 = device_props.has_native_half_support;
m_shader_props.ROP_output_rounding = g_cfg.video.shader_precision != gpu_preset_level::low;
m_shader_props.ROP_output_rounding = (g_cfg.video.shader_precision != gpu_preset_level::low) && !!(m_prog.ctrl & RSX_SHADER_CONTROL_8BIT_FRAMEBUFFER);
m_shader_props.ROP_sRGB_packing = !!(m_prog.ctrl & RSX_SHADER_CONTROL_SRGB_FRAMEBUFFER);
m_shader_props.ROP_alpha_test = !!(m_prog.ctrl & RSX_SHADER_CONTROL_ALPHA_TEST);
m_shader_props.ROP_alpha_to_coverage_test = !!(m_prog.ctrl & RSX_SHADER_CONTROL_ALPHA_TO_COVERAGE);
m_shader_props.ROP_polygon_stipple_test = !!(m_prog.ctrl & RSX_SHADER_CONTROL_POLYGON_STIPPLE);
m_shader_props.ROP_discard = !!(m_prog.ctrl & RSX_SHADER_CONTROL_USES_KIL);
m_shader_props.require_tex1D_ops = properties.has_tex1D;
m_shader_props.require_tex2D_ops = properties.has_tex2D;
m_shader_props.require_tex3D_ops = properties.has_tex3D;
m_shader_props.require_shadowProj_ops = properties.shadow_sampler_mask != 0 && properties.has_texShadowProj;
m_shader_props.require_alpha_kill = !!(m_prog.ctrl & RSX_SHADER_CONTROL_TEXTURE_ALPHA_KILL);
glsl::insert_glsl_legacy_function(OS, m_shader_props);
}

View file

@ -216,12 +216,12 @@ namespace glsl
enabled_options.push_back("_32_BIT_OUTPUT");
}
if (!props.fp32_outputs)
if (props.ROP_sRGB_packing)
{
enabled_options.push_back("_ENABLE_FRAMEBUFFER_SRGB");
}
if (props.disable_early_discard)
if (props.disable_early_discard && props.ROP_discard)
{
enabled_options.push_back("_DISABLE_EARLY_DISCARD");
}
@ -231,7 +231,15 @@ namespace glsl
enabled_options.push_back("_ENABLE_ROP_OUTPUT_ROUNDING");
}
enabled_options.push_back("_ENABLE_POLYGON_STIPPLE");
if (props.ROP_alpha_test)
{
enabled_options.push_back("_ENABLE_ALPHA_TEST");
}
if (props.ROP_polygon_stipple_test)
{
enabled_options.push_back("_ENABLE_POLYGON_STIPPLE");
}
}
// Import common header
@ -276,12 +284,12 @@ namespace glsl
return;
}
if (props.emulate_coverage_tests)
if (props.ROP_alpha_to_coverage_test)
{
enabled_options.push_back("_EMULATE_COVERAGE_TEST");
enabled_options.push_back("_ENABLE_ALPHA_TO_COVERAGE_TEST");
}
if (!props.fp32_outputs || props.require_linear_to_srgb)
if (props.ROP_sRGB_packing || props.require_linear_to_srgb)
{
enabled_options.push_back("_ENABLE_LINEAR_TO_SRGB");
}
@ -296,6 +304,11 @@ namespace glsl
enabled_options.push_back("_ENABLE_WPOS");
}
if (props.ROP_alpha_test || (props.require_msaa_ops && props.require_tex_shadow_ops))
{
enabled_options.push_back("_ENABLE_COMPARISON_FUNC");
}
if (props.require_fog_read)
{
program_common::define_glsl_constants<rsx::fog_mode>(OS,
@ -385,6 +398,11 @@ namespace glsl
enabled_options.push_back("_ENABLE_SHADOWPROJ");
}
if (props.require_alpha_kill)
{
enabled_options.push_back("_ENABLE_TEXTURE_ALPHA_KILL");
}
program_common::define_glsl_switches(OS, enabled_options);
enabled_options.clear();

View file

@ -81,7 +81,7 @@ vec4 fetch_fog_value(const in uint mode)
}
#endif
#ifdef _EMULATE_COVERAGE_TEST
#ifdef _ENABLE_ALPHA_TO_COVERAGE_TEST
// Purely stochastic
bool coverage_test_passes(const in vec4 _sample)
{
@ -109,6 +109,7 @@ vec4 srgb_to_linear(const in vec4 cs)
}
#endif
#ifdef _ENABLE_COMPARISON_FUNC
// Required by all fragment shaders for alpha test
bool comparison_passes(const in float a, const in float b, const in uint func)
{
@ -125,5 +126,6 @@ bool comparison_passes(const in float a, const in float b, const in uint func)
case 7: return true; //always
}
}
#endif
)"

View file

@ -188,6 +188,7 @@ vec4 _process_texel(in vec4 rgba, const in uint control_bits)
return rgba;
}
#ifdef _ENABLE_TEXTURE_ALPHA_KILL
if (_test_bit(control_bits, ALPHAKILL))
{
// Alphakill
@ -197,6 +198,7 @@ vec4 _process_texel(in vec4 rgba, const in uint control_bits)
return rgba;
}
}
#endif
if (_test_bit(control_bits, RENORMALIZE))
{

View file

@ -8,43 +8,33 @@ R"(
#endif
#ifdef _ENABLE_FRAMEBUFFER_SRGB
if (_test_bit(rop_control, SRGB_FRAMEBUFFER_BIT))
{
col0.rgb = _mrt_color_t(linear_to_srgb(col0)).rgb;
col1.rgb = _mrt_color_t(linear_to_srgb(col1)).rgb;
col2.rgb = _mrt_color_t(linear_to_srgb(col2)).rgb;
col3.rgb = _mrt_color_t(linear_to_srgb(col3)).rgb;
}
col0.rgb = _mrt_color_t(linear_to_srgb(col0)).rgb;
col1.rgb = _mrt_color_t(linear_to_srgb(col1)).rgb;
col2.rgb = _mrt_color_t(linear_to_srgb(col2)).rgb;
col3.rgb = _mrt_color_t(linear_to_srgb(col3)).rgb;
#endif
#ifdef _ENABLE_ROP_OUTPUT_ROUNDING
if (_test_bit(rop_control, INT_FRAMEBUFFER_BIT))
{
col0 = round_to_8bit(col0);
col1 = round_to_8bit(col1);
col2 = round_to_8bit(col2);
col3 = round_to_8bit(col3);
}
col0 = round_to_8bit(col0);
col1 = round_to_8bit(col1);
col2 = round_to_8bit(col2);
col3 = round_to_8bit(col3);
#endif
// Post-output stages
// Alpha Testing
if (_test_bit(rop_control, ALPHA_TEST_ENABLE_BIT))
#ifdef _ENABLE_ALPHA_TEST
const uint alpha_func = _get_bits(rop_control, ALPHA_TEST_FUNC_OFFSET, ALPHA_TEST_FUNC_LENGTH);
if (!comparison_passes(col0.a, alpha_ref, alpha_func))
{
const uint alpha_func = _get_bits(rop_control, ALPHA_TEST_FUNC_OFFSET, ALPHA_TEST_FUNC_LENGTH);
if (!comparison_passes(col0.a, alpha_ref, alpha_func))
{
discard;
}
discard;
}
#endif
#ifdef _EMULATE_COVERAGE_TEST
if (_test_bit(rop_control, ALPHA_TO_COVERAGE_ENABLE_BIT))
#ifdef _ENABLE_ALPHA_TO_COVERAGE_TEST
if (!_test_bit(rop_control, MSAA_WRITE_ENABLE_BIT) || !coverage_test_passes(col0))
{
if (!_test_bit(rop_control, MSAA_WRITE_ENABLE_BIT) || !coverage_test_passes(col0))
{
discard;
}
discard;
}
#endif

View file

@ -36,12 +36,18 @@ namespace glsl
bool require_srgb_to_linear : 1;
bool require_linear_to_srgb : 1;
bool require_fog_read : 1;
bool emulate_coverage_tests : 1;
bool emulate_shadow_compare : 1;
bool low_precision_tests : 1;
bool disable_early_discard : 1;
bool supports_native_fp16 : 1;
// ROP control flags
bool ROP_output_rounding : 1;
bool ROP_sRGB_packing : 1;
bool ROP_alpha_test : 1;
bool ROP_alpha_to_coverage_test : 1;
bool ROP_polygon_stipple_test : 1;
bool ROP_discard : 1;
// Texturing spec
bool require_texture_ops : 1; // Global switch to enable/disable all texture code
@ -53,5 +59,6 @@ namespace glsl
bool require_tex2D_ops : 1; // Include 2D texture stuff
bool require_tex3D_ops : 1; // Include 3D texture stuff (including cubemap)
bool require_shadowProj_ops : 1; // Include shadow2DProj projection textures (1D is unsupported anyway)
bool require_alpha_kill : 1; // Include alpha kill checking code
};
};

View file

@ -2063,6 +2063,26 @@ namespace rsx
{
current_fragment_program.ctrl |= RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION;
}
if (method_registers.alpha_test_enabled())
{
current_fragment_program.ctrl |= RSX_SHADER_CONTROL_ALPHA_TEST;
}
if (method_registers.polygon_stipple_enabled())
{
current_fragment_program.ctrl |= RSX_SHADER_CONTROL_POLYGON_STIPPLE;
}
if (method_registers.msaa_alpha_to_coverage_enabled())
{
const bool is_multiple_samples = method_registers.surface_antialias() != rsx::surface_antialiasing::center_1_sample;
if (!backend_config.supports_hw_a2c || (!is_multiple_samples && !backend_config.supports_hw_a2c_1spp))
{
// Emulation required
current_fragment_program.ctrl |= RSX_SHADER_CONTROL_ALPHA_TO_COVERAGE;
}
}
}
else if (method_registers.point_sprite_enabled() &&
method_registers.current_draw_clause.primitive == primitive_type::points)
@ -2071,6 +2091,24 @@ namespace rsx
current_fragment_program.texcoord_control_mask |= u32(method_registers.point_sprite_control_mask()) << 16;
}
// Check if framebuffer is actually an XRGB format and not a WZYX format
switch (method_registers.surface_color())
{
case rsx::surface_color_format::w16z16y16x16:
case rsx::surface_color_format::w32z32y32x32:
case rsx::surface_color_format::x32:
// These behave very differently from "normal" formats.
break;
default:
// Integer framebuffer formats. These can support sRGB output as well as some special rules for output quantization.
current_fragment_program.ctrl |= RSX_SHADER_CONTROL_8BIT_FRAMEBUFFER;
if (method_registers.framebuffer_srgb_enabled())
{
current_fragment_program.ctrl |= RSX_SHADER_CONTROL_SRGB_FRAMEBUFFER;
}
break;
}
for (u32 textures_ref = current_fp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i)
{
if (!(textures_ref & 1)) continue;
@ -2098,6 +2136,7 @@ namespace rsx
{
//alphakill can be ignored unless a valid comparison function is set
texture_control |= (1 << texture_control_bits::ALPHAKILL);
current_fragment_program.ctrl |= RSX_SHADER_CONTROL_TEXTURE_ALPHA_KILL;
}
//const u32 texaddr = rsx::get_address(tex.offset(), tex.location());

View file

@ -318,16 +318,24 @@ void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
m_shader_props.require_srgb_to_linear = properties.has_upg;
m_shader_props.require_linear_to_srgb = properties.has_pkg;
m_shader_props.require_fog_read = properties.in_register_mask & in_fogc;
m_shader_props.emulate_coverage_tests = g_cfg.video.antialiasing_level == msaa_level::none;
m_shader_props.emulate_shadow_compare = device_props.emulate_depth_compare;
m_shader_props.low_precision_tests = device_props.has_low_precision_rounding && !(m_prog.ctrl & RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION);
m_shader_props.disable_early_discard = !vk::is_NVIDIA(vk::get_driver_vendor());
m_shader_props.supports_native_fp16 = device_props.has_native_half_support;
m_shader_props.ROP_output_rounding = g_cfg.video.shader_precision != gpu_preset_level::low;
m_shader_props.ROP_output_rounding = (g_cfg.video.shader_precision != gpu_preset_level::low) && !!(m_prog.ctrl & RSX_SHADER_CONTROL_8BIT_FRAMEBUFFER);
m_shader_props.ROP_sRGB_packing = !!(m_prog.ctrl & RSX_SHADER_CONTROL_SRGB_FRAMEBUFFER);
m_shader_props.ROP_alpha_test = !!(m_prog.ctrl & RSX_SHADER_CONTROL_ALPHA_TEST);
m_shader_props.ROP_alpha_to_coverage_test = !!(m_prog.ctrl & RSX_SHADER_CONTROL_ALPHA_TO_COVERAGE);
m_shader_props.ROP_polygon_stipple_test = !!(m_prog.ctrl & RSX_SHADER_CONTROL_POLYGON_STIPPLE);
m_shader_props.ROP_discard = !!(m_prog.ctrl & RSX_SHADER_CONTROL_USES_KIL);
m_shader_props.require_tex1D_ops = properties.has_tex1D;
m_shader_props.require_tex2D_ops = properties.has_tex2D;
m_shader_props.require_tex3D_ops = properties.has_tex3D;
m_shader_props.require_shadowProj_ops = properties.shadow_sampler_mask != 0 && properties.has_texShadowProj;
m_shader_props.require_alpha_kill = !!(m_prog.ctrl & RSX_SHADER_CONTROL_TEXTURE_ALPHA_KILL);
// Declare global constants
if (m_shader_props.require_fog_read)

View file

@ -454,9 +454,17 @@ namespace gcm
RSX_SHADER_CONTROL_UNKNOWN1 = 0x8000, // seemingly set when srgb packer is used??
// Custom
RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION = 0x10000, // Rasterizing triangles and not lines or points
RSX_SHADER_CONTROL_INSTANCED_CONSTANTS = 0x20000, // Support instance ID offsets when loading constants
RSX_SHADER_CONTROL_INTERPRETER_MODEL = 0x40000, // Compile internals expecting interpreter
RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION = 0x0010000, // Rasterizing triangles and not lines or points
RSX_SHADER_CONTROL_INSTANCED_CONSTANTS = 0x0020000, // Support instance ID offsets when loading constants
RSX_SHADER_CONTROL_INTERPRETER_MODEL = 0x0040000, // Compile internals expecting interpreter
RSX_SHADER_CONTROL_8BIT_FRAMEBUFFER = 0x0080000, // Quantize outputs to 8-bit FBO
RSX_SHADER_CONTROL_SRGB_FRAMEBUFFER = 0x0100000, // Outputs are SRGB. We could reuse UNKNOWN1 but we just keep the namespaces separate.
RSX_SHADER_CONTROL_TEXTURE_ALPHA_KILL = 0x0200000, // Uses alpha kill on texture input
RSX_SHADER_CONTROL_ALPHA_TEST = 0x0400000, // Uses alpha test on the outputs
RSX_SHADER_CONTROL_POLYGON_STIPPLE = 0x0800000, // Uses polygon stipple for dithered rendering
RSX_SHADER_CONTROL_ALPHA_TO_COVERAGE = 0x1000000, // Alpha to coverage
};
// GCM Reports