rsx/vk: Convert fragment constants from static struct to array of vec4

- Also allows vulkan to use full-range binding model
This commit is contained in:
kd-11 2025-07-27 02:36:58 +03:00 committed by kd-11
parent 3188cb4a5a
commit e0e0d1ee0b
10 changed files with 91 additions and 132 deletions

View file

@ -158,50 +158,38 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS)
OS << "\n";
std::string constants_block;
for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM])
if (!properties.constant_offsets.empty())
{
if (PT.type == "sampler1D" ||
PT.type == "sampler2D" ||
PT.type == "sampler3D" ||
PT.type == "samplerCube")
continue;
for (const ParamItem& PI : PT.items)
{
constants_block += " " + PT.type + " " + PI.name + ";\n";
}
OS <<
"layout(std140, binding = " << GL_FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT << ") uniform FragmentConstantsBuffer\n"
"{\n"
" vec4 fc[" << properties.constant_offsets.size() << "];\n"
"};\n"
"#define _fetch_constant(x) fc[x]\n\n";
}
if (!constants_block.empty())
{
OS << "layout(std140, binding = " << GL_FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT << ") uniform FragmentConstantsBuffer\n";
OS << "{\n";
OS << constants_block;
OS << "};\n\n";
}
OS <<
"layout(std140, binding = " << GL_FRAGMENT_STATE_BIND_SLOT << ") uniform FragmentStateBuffer\n"
"{\n"
" float fog_param0;\n"
" float fog_param1;\n"
" uint rop_control;\n"
" float alpha_ref;\n"
" uint reserved;\n"
" uint fog_mode;\n"
" float wpos_scale;\n"
" float wpos_bias;\n"
"};\n\n"
OS << "layout(std140, binding = " << GL_FRAGMENT_STATE_BIND_SLOT << ") uniform FragmentStateBuffer\n";
OS << "{\n";
OS << " float fog_param0;\n";
OS << " float fog_param1;\n";
OS << " uint rop_control;\n";
OS << " float alpha_ref;\n";
OS << " uint reserved;\n";
OS << " uint fog_mode;\n";
OS << " float wpos_scale;\n";
OS << " float wpos_bias;\n";
OS << "};\n\n";
"layout(std140, binding = " << GL_FRAGMENT_TEXTURE_PARAMS_BIND_SLOT << ") uniform TextureParametersBuffer\n"
"{\n"
" sampler_info texture_parameters[16];\n"
"};\n\n"
OS << "layout(std140, binding = " << GL_FRAGMENT_TEXTURE_PARAMS_BIND_SLOT << ") uniform TextureParametersBuffer\n";
OS << "{\n";
OS << " sampler_info texture_parameters[16];\n";
OS << "};\n\n";
OS << "layout(std140, binding = " << GL_RASTERIZER_STATE_BIND_SLOT << ") uniform RasterizerHeap\n";
OS << "{\n";
OS << " uvec4 stipple_pattern[8];\n";
OS << "};\n\n";
"layout(std140, binding = " << GL_RASTERIZER_STATE_BIND_SLOT << ") uniform RasterizerHeap\n";
"{\n";
" uvec4 stipple_pattern[8];\n";
"};\n\n";
}
void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
@ -373,21 +361,7 @@ void GLFragmentProgram::Decompile(const RSXFragmentProgram& prog)
decompiler.Task();
for (const ParamType& PT : decompiler.m_parr.params[PF_PARAM_UNIFORM])
{
for (const ParamItem& PI : PT.items)
{
if (PT.type == "sampler1D" ||
PT.type == "sampler2D" ||
PT.type == "sampler3D" ||
PT.type == "samplerCube")
continue;
usz offset = atoi(PI.name.c_str() + 2);
FragmentConstantOffsetCache.push_back(offset);
}
}
constant_offsets = std::move(decompiler.properties.constant_offsets);
shader.create(::glsl::program_domain::glsl_fragment_program, source);
id = shader.id();
}

View file

@ -58,7 +58,7 @@ public:
ParamArray parr;
u32 id;
gl::glsl::shader shader;
std::vector<usz> FragmentConstantOffsetCache;
std::vector<u32> constant_offsets;
/**
* Decompile a fragment shader located in the PS3's Memory. This function operates synchronously.

View file

@ -232,23 +232,13 @@ std::string FragmentProgramDecompiler::AddCond()
std::string FragmentProgramDecompiler::AddConst()
{
const std::string name = std::string("fc") + std::to_string(m_size + 4 * 4);
const std::string type = getFloatTypeName(4);
if (m_parr.HasParam(PF_PARAM_UNIFORM, type, name))
{
return name;
}
auto data = reinterpret_cast<be_t<u32>*>(reinterpret_cast<uptr>(m_prog.get_data()) + m_size + 4 * sizeof(u32));
// Skip next instruction, its just a literal
m_offset = 2 * 4 * sizeof(u32);
u32 x = GetData(data[0]);
u32 y = GetData(data[1]);
u32 z = GetData(data[2]);
u32 w = GetData(data[3]);
const auto var = fmt::format("%s(%f, %f, %f, %f)", type, std::bit_cast<f32>(x), std::bit_cast<f32>(y), std::bit_cast<f32>(z), std::bit_cast<f32>(w));
return m_parr.AddParam(PF_PARAM_UNIFORM, type, name, var);
// Return the next offset index
const u32 index = ::size32(properties.constant_offsets);
properties.constant_offsets.push_back(m_size + 4 * 4);
return "_fetch_constant(" + std::to_string(index) + ")";
}
std::string FragmentProgramDecompiler::AddTex()
@ -847,6 +837,12 @@ std::string FragmentProgramDecompiler::BuildCode()
}
}
if (!properties.constant_offsets.empty())
{
const std::string var_name = fmt::format("fc[%llu]", properties.constant_offsets.size());
m_parr.AddParam(PF_PARAM_CONST, getFloatTypeName(4), var_name);
}
std::stringstream OS;
if (!m_is_valid_ucode)

View file

@ -189,6 +189,9 @@ public:
bool has_tex2D = false;
bool has_tex3D = false;
bool has_texShadowProj = false;
// Literal offsets
std::vector<u32> constant_offsets;
}
properties;

View file

@ -782,10 +782,10 @@ bool fragment_program_compare::compare_properties(const RSXFragmentProgram& bina
namespace rsx
{
#if defined(ARCH_X64) || defined(ARCH_ARM64)
static inline void write_fragment_constants_to_buffer_sse2(const std::span<f32>& buffer, const RSXFragmentProgram& rsx_prog, const std::vector<usz>& offsets_cache, bool sanitize)
static inline void write_fragment_constants_to_buffer_sse2(const std::span<f32>& buffer, const RSXFragmentProgram& rsx_prog, const std::vector<u32>& offsets_cache, bool sanitize)
{
f32* dst = buffer.data();
for (usz offset_in_fragment_program : offsets_cache)
for (u32 offset_in_fragment_program : offsets_cache)
{
const char* data = static_cast<const char*>(rsx_prog.get_data()) + offset_in_fragment_program;
@ -809,7 +809,7 @@ namespace rsx
}
}
#else
static inline void write_fragment_constants_to_buffer_fallback(const std::span<f32>& buffer, const RSXFragmentProgram& rsx_prog, const std::vector<usz>& offsets_cache, bool sanitize)
static inline void write_fragment_constants_to_buffer_fallback(const std::span<f32>& buffer, const RSXFragmentProgram& rsx_prog, const std::vector<u32>& offsets_cache, bool sanitize)
{
f32* dst = buffer.data();
@ -837,7 +837,7 @@ namespace rsx
}
#endif
void write_fragment_constants_to_buffer(const std::span<f32>& buffer, const RSXFragmentProgram& rsx_prog, const std::vector<usz>& offsets_cache, bool sanitize)
void write_fragment_constants_to_buffer(const std::span<f32>& buffer, const RSXFragmentProgram& rsx_prog, const std::vector<u32>& offsets_cache, bool sanitize)
{
#if defined(ARCH_X64) || defined(ARCH_ARM64)
write_fragment_constants_to_buffer_sse2(buffer, rsx_prog, offsets_cache, sanitize);

View file

@ -137,7 +137,7 @@ namespace rsx
RSXVertexProgram m_cached_vp_properties;
};
void write_fragment_constants_to_buffer(const std::span<f32>& buffer, const RSXFragmentProgram& rsx_prog, const std::vector<usz>& offsets_cache, bool sanitize = true);
void write_fragment_constants_to_buffer(const std::span<f32>& buffer, const RSXFragmentProgram& rsx_prog, const std::vector<u32>& offsets_cache, bool sanitize = true);
}
@ -447,14 +447,14 @@ public:
void fill_fragment_constants_buffer(std::span<f32> dst_buffer, const fragment_program_type& fragment_program, const RSXFragmentProgram& rsx_prog, bool sanitize = false) const
{
if (dst_buffer.size_bytes() < (fragment_program.FragmentConstantOffsetCache.size() * 16))
if (dst_buffer.size_bytes() < (fragment_program.constant_offsets.size() * 16))
{
// This can happen if CELL alters the shader after it has been loaded by RSX.
rsx_log.error("Insufficient constants buffer size passed to fragment program! Corrupt shader?");
return;
}
rsx::write_fragment_constants_to_buffer(dst_buffer, rsx_prog, fragment_program.FragmentConstantOffsetCache, sanitize);
rsx::write_fragment_constants_to_buffer(dst_buffer, rsx_prog, fragment_program.constant_offsets, sanitize);
}
void clear()

View file

@ -29,27 +29,19 @@ std::string VKFragmentDecompilerThread::compareFunction(COMPARE f, const std::st
void VKFragmentDecompilerThread::prepareBindingTable()
{
// First check if we have constants and textures as those need extra work
bool has_constants = false, has_textures = false;
bool has_textures = false;
for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM])
{
if (has_constants && has_textures)
{
break;
}
if (PT.type.starts_with("sampler"))
{
has_textures = true;
continue;
break;
}
ensure(PT.type.starts_with("vec"));
has_constants = true;
}
unsigned location = 0; // All bindings must be set from this var
vk_prog->binding_table.context_buffer_location = location++;
if (has_constants)
if (!properties.constant_offsets.empty())
{
vk_prog->binding_table.cbuf_location = location++;
}
@ -233,26 +225,13 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)
}
}
std::string constants_block;
for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM])
if (!properties.constant_offsets.empty())
{
if (PT.type.starts_with("sampler"))
{
continue;
}
for (const ParamItem& PI : PT.items)
{
constants_block += " " + PT.type + " " + PI.name + ";\n";
}
}
if (!constants_block.empty())
{
OS << "layout(std140, set=1, binding=" << vk_prog->binding_table.cbuf_location << ") uniform FragmentConstantsBuffer\n";
OS << "layout(std140, set=1, binding=" << vk_prog->binding_table.cbuf_location << ") readonly buffer FragmentConstantsBuffer\n";
OS << "{\n";
OS << constants_block;
OS << "};\n\n";
OS << " vec4 fc[];\n";
OS << "};\n";
OS << "#define _fetch_constant(x) fc[x + fs_constants_offset]\n\n";
}
OS << "layout(std140, set=1, binding=" << vk_prog->binding_table.context_buffer_location << ") uniform FragmentStateBuffer\n";
@ -280,19 +259,20 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)
vk::glsl::program_input in
{
.domain = glsl::glsl_fragment_program,
.type = vk::glsl::input_type_uniform_buffer,
.set = vk::glsl::binding_set_index_fragment
};
if (!constants_block.empty())
if (!properties.constant_offsets.empty())
{
in.location = vk_prog->binding_table.cbuf_location;
in.name = "FragmentConstantsBuffer";
in.type = vk::glsl::input_type_storage_buffer,
inputs.push_back(in);
}
in.location = vk_prog->binding_table.context_buffer_location;
in.name = "FragmentStateBuffer";
in.type = vk::glsl::input_type_uniform_buffer;
inputs.push_back(in);
in.location = vk_prog->binding_table.tex_param_location;
@ -302,6 +282,23 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)
in.location = vk_prog->binding_table.polygon_stipple_params_location;
in.name = "RasterizerHeap";
inputs.push_back(in);
OS <<
"layout(push_constant) uniform push_constants_block\n"
"{\n"
" uint fs_constants_offset;\n"
"};\n\n";
const vk::glsl::program_input push_constants
{
.domain = glsl::glsl_fragment_program,
.type = vk::glsl::input_type_push_constant,
.bound_data = vk::glsl::push_constant_ref{.offset = 12, .size = 4 },
.set = vk::glsl::binding_set_index_vertex,
.location = umax,
.name = "fs_push_constants_block"
};
inputs.push_back(push_constants);
}
void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)
@ -478,19 +475,8 @@ void VKFragmentProgram::Decompile(const RSXFragmentProgram& prog)
decompiler.device_props.has_low_precision_rounding = vk::is_NVIDIA(vk::get_driver_vendor());
decompiler.Task();
constant_offsets = std::move(decompiler.properties.constant_offsets);
shader.create(::glsl::program_domain::glsl_fragment_program, source);
for (const ParamType& PT : decompiler.m_parr.params[PF_PARAM_UNIFORM])
{
for (const ParamItem& PI : PT.items)
{
if (PT.type.starts_with("sampler"))
continue;
usz offset = atoi(PI.name.c_str() + 2);
FragmentConstantOffsetCache.push_back(offset);
}
}
}
void VKFragmentProgram::Compile()

View file

@ -62,7 +62,7 @@ public:
VkShaderModule handle = nullptr;
u32 id;
vk::glsl::shader shader;
std::vector<usz> FragmentConstantOffsetCache;
std::vector<u32> constant_offsets;
std::array<u32, 4> output_color_masks{ {} };
std::vector<vk::glsl::program_input> uniforms;

View file

@ -514,7 +514,7 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar)
m_vertex_env_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex env buffer");
m_fragment_texture_params_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment texture params buffer");
m_vertex_layout_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex layout buffer", 0x10000, VK_TRUE);
m_fragment_constants_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment constants buffer");
m_fragment_constants_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment constants buffer");
m_transform_constants_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, "transform constants buffer");
m_index_buffer_ring_info.create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, "index buffer");
m_texture_upload_buffer_ring_info.create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, "texture upload buffer", 32 * 0x100000);
@ -552,11 +552,12 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar)
// Initialize optional allocation information with placeholders
m_vertex_env_buffer_info = { m_vertex_env_ring_info.heap->value, 0, VK_WHOLE_SIZE };
m_vertex_constants_buffer_info = { m_transform_constants_ring_info.heap->value, 0, 16 };
m_vertex_constants_buffer_info = { m_transform_constants_ring_info.heap->value, 0, VK_WHOLE_SIZE };
m_fragment_env_buffer_info = { m_fragment_env_ring_info.heap->value, 0, 16 };
m_fragment_texture_params_buffer_info = { m_fragment_texture_params_ring_info.heap->value, 0, 16 };
m_raster_env_buffer_info = { m_raster_env_ring_info.heap->value, 0, 128 };
m_vertex_layout_stream_info = { m_vertex_layout_ring_info.heap->value, 0, VK_WHOLE_SIZE };
m_fragment_constants_buffer_info = { m_fragment_constants_ring_info.heap->value, 0, VK_WHOLE_SIZE };
const auto& limits = m_device->gpu().get_limits();
m_texbuffer_view_size = std::min(limits.maxTexelBufferElements, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000u);
@ -2007,18 +2008,13 @@ void VKGSRender::load_program_env()
// Fragment constants
if (fragment_constants_size)
{
auto mem = m_fragment_constants_ring_info.alloc<256>(fragment_constants_size);
auto buf = m_fragment_constants_ring_info.map(mem, fragment_constants_size);
m_fragment_constants_dynamic_offset = m_fragment_constants_ring_info.alloc<16>(fragment_constants_size);
auto buf = m_fragment_constants_ring_info.map(m_fragment_constants_dynamic_offset, fragment_constants_size);
m_prog_buffer->fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), fragment_constants_size },
*ensure(m_fragment_prog), current_fragment_program, true);
m_fragment_constants_ring_info.unmap();
m_fragment_constants_buffer_info = { m_fragment_constants_ring_info.heap->value, mem, fragment_constants_size };
}
else
{
m_fragment_constants_buffer_info = { m_fragment_constants_ring_info.heap->value, 0, 32 };
}
}
@ -2194,6 +2190,7 @@ void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_
u32 xform_constants_offset;
u32 vs_context_offset;
u32 vs_attrib_layout_offset;
u32 fs_constants_offset;
};
struct rsx_prog_vertex_layout_entry_t
@ -2206,15 +2203,17 @@ void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_
};
// Actual allocation must have been done previously
const u32 constant_id_offset = static_cast<u32>(m_xform_constants_dynamic_offset) / 16u;
const u32 vs_constant_id_offset = static_cast<u32>(m_xform_constants_dynamic_offset) / 16u;
const u32 vertex_context_offset = static_cast<u32>(m_vertex_env_dynamic_offset) / 128u;
const u32 vertex_layout_offset = static_cast<u32>(m_vertex_layout_dynamic_offset) / 144u;
const u32 fs_constant_id_offset = static_cast<u32>(m_fragment_constants_dynamic_offset) / 16u;
// Pack
rsx_prog_push_constants_block_t push_constants;
push_constants.xform_constants_offset = constant_id_offset;
push_constants.xform_constants_offset = vs_constant_id_offset;
push_constants.vs_context_offset = vertex_context_offset;
push_constants.vs_attrib_layout_offset = vertex_layout_offset + id;
push_constants.fs_constants_offset = fs_constant_id_offset;
vkCmdPushConstants(
*m_current_command_buffer,

View file

@ -153,6 +153,7 @@ private:
u64 m_xform_constants_dynamic_offset = 0; // We manage transform_constants dynamic offset manually to alleviate performance penalty of doing a hot-patch of constants.
u64 m_vertex_env_dynamic_offset = 0;
u64 m_vertex_layout_dynamic_offset = 0;
u64 m_fragment_constants_dynamic_offset = 0;
std::array<vk::frame_context_t, VK_MAX_ASYNC_FRAMES> frame_context_storage;
//Temp frame context to use if the real frame queue is overburdened. Only used for storage