mirror of
https://github.com/RPCSX/rpcsx.git
synced 2026-01-04 15:50:10 +01:00
319 lines
8.3 KiB
C++
319 lines
8.3 KiB
C++
#include "stdafx.h"
|
|
#include "GLShaderInterpreter.h"
|
|
#include "GLGSRender.h"
|
|
#include "GLVertexProgram.h"
|
|
#include "GLFragmentProgram.h"
|
|
#include "../Common/ShaderInterpreter.h"
|
|
#include "../Common/GLSLCommon.h"
|
|
|
|
namespace gl
|
|
{
|
|
using glsl::shader;
|
|
|
|
namespace interpreter
|
|
{
|
|
void texture_pool_allocator::create(shader::type domain)
|
|
{
|
|
GLenum pname;
|
|
switch (domain)
|
|
{
|
|
default:
|
|
rsx_log.fatal("Unexpected program domain %d", static_cast<int>(domain));
|
|
case shader::type::vertex:
|
|
pname = GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS; break;
|
|
case shader::type::fragment:
|
|
pname = GL_MAX_TEXTURE_IMAGE_UNITS; break;
|
|
}
|
|
|
|
glGetIntegerv(pname, &max_image_units);
|
|
}
|
|
|
|
void texture_pool_allocator::allocate(int size)
|
|
{
|
|
if ((used + size) > max_image_units)
|
|
{
|
|
rsx_log.fatal("Out of image binding slots!");
|
|
}
|
|
|
|
used += size;
|
|
texture_pool pool;
|
|
pool.pool_size = size;
|
|
pools.push_back(pool);
|
|
}
|
|
}
|
|
|
|
void shader_interpreter::create()
|
|
{
|
|
texture_pools[0].create(shader::type::vertex);
|
|
texture_pools[1].create(shader::type::fragment);
|
|
|
|
build_vs();
|
|
build_fs();
|
|
|
|
program_handle.create().
|
|
attach(vs).
|
|
attach(fs).
|
|
link();
|
|
|
|
program_handle.uniforms[0] = GL_STREAM_BUFFER_START + 0;
|
|
program_handle.uniforms[1] = GL_STREAM_BUFFER_START + 1;
|
|
|
|
// Initialize texture bindings
|
|
int assigned = 0;
|
|
auto& allocator = texture_pools[1];
|
|
const char* type_names[] = { "sampler1D_array", "sampler2D_array", "samplerCube_array", "sampler3D_array" };
|
|
|
|
for (int i = 0; i < 4; ++i)
|
|
{
|
|
for (int j = 0; j < allocator.pools[i].pool_size; ++j)
|
|
{
|
|
allocator.pools[i].allocate(assigned++);
|
|
}
|
|
|
|
program_handle.uniforms[type_names[i]] = allocator.pools[i].allocated;
|
|
}
|
|
}
|
|
|
|
void shader_interpreter::destroy()
|
|
{
|
|
program_handle.remove();
|
|
vs.remove();
|
|
fs.remove();
|
|
}
|
|
|
|
glsl::program* shader_interpreter::get()
|
|
{
|
|
return &program_handle;
|
|
}
|
|
|
|
void shader_interpreter::build_vs()
|
|
{
|
|
::glsl::shader_properties properties{};
|
|
properties.domain = ::glsl::program_domain::glsl_vertex_program;
|
|
properties.require_lit_emulation = true;
|
|
|
|
// TODO: Extend decompiler thread
|
|
// TODO: Rename decompiler thread, it no longer spawns a thread
|
|
RSXVertexProgram null_prog;
|
|
std::string shader_str;
|
|
ParamArray arr;
|
|
GLVertexDecompilerThread comp(null_prog, shader_str, arr);
|
|
|
|
std::stringstream builder;
|
|
comp.insertHeader(builder);
|
|
|
|
builder << "#define Z_NEGATIVE_ONE_TO_ONE\n\n";
|
|
|
|
comp.insertConstants(builder, {});
|
|
comp.insertInputs(builder, {});
|
|
|
|
// Insert vp stream input
|
|
builder << "\n"
|
|
"layout(std140, binding = " << GL_INTERPRETER_VERTEX_BLOCK << ") readonly restrict buffer VertexInstructionBlock\n"
|
|
"{\n"
|
|
" uint base_address;\n"
|
|
" uint entry;\n"
|
|
" uint output_mask;\n"
|
|
" uint reserved;\n"
|
|
" uvec4 vp_instructions[];\n"
|
|
"};\n\n";
|
|
|
|
::glsl::insert_glsl_legacy_function(builder, properties);
|
|
::glsl::insert_vertex_input_fetch(builder, ::glsl::glsl_rules::glsl_rules_opengl4);
|
|
|
|
builder << program_common::interpreter::get_vertex_interpreter();
|
|
const std::string s = builder.str();
|
|
|
|
vs.create(glsl::shader::type::vertex);
|
|
vs.source(s);
|
|
vs.compile();
|
|
}
|
|
|
|
void shader_interpreter::build_fs()
|
|
{
|
|
// Allocate TIUs
|
|
auto& allocator = texture_pools[1];
|
|
if (allocator.max_image_units >= 32)
|
|
{
|
|
// 16 + 4 + 4 + 4
|
|
allocator.allocate(4); // 1D
|
|
allocator.allocate(16); // 2D
|
|
allocator.allocate(4); // CUBE
|
|
allocator.allocate(4); // 3D
|
|
}
|
|
else if (allocator.max_image_units >= 24)
|
|
{
|
|
// 16 + 4 + 2 + 2
|
|
allocator.allocate(2); // 1D
|
|
allocator.allocate(16); // 2D
|
|
allocator.allocate(2); // CUBE
|
|
allocator.allocate(4); // 3D
|
|
}
|
|
else if (allocator.max_image_units >= 16)
|
|
{
|
|
// 10 + 2 + 2 + 2
|
|
allocator.allocate(2); // 1D
|
|
allocator.allocate(10); // 2D
|
|
allocator.allocate(2); // CUBE
|
|
allocator.allocate(2); // 3D
|
|
}
|
|
else
|
|
{
|
|
// Unusable
|
|
rsx_log.fatal("Failed to allocate enough TIUs for shader interpreter.");
|
|
}
|
|
|
|
::glsl::shader_properties properties{};
|
|
properties.domain = ::glsl::program_domain::glsl_fragment_program;
|
|
properties.require_depth_conversion = true;
|
|
properties.require_wpos = true;
|
|
|
|
u32 len;
|
|
ParamArray arr;
|
|
std::string shader_str;
|
|
RSXFragmentProgram frag;
|
|
GLFragmentDecompilerThread comp(shader_str, arr, frag, len);
|
|
|
|
std::stringstream builder;
|
|
builder <<
|
|
"#version 450\n"
|
|
"#extension GL_ARB_bindless_texture : require\n\n";
|
|
|
|
::glsl::insert_subheader_block(builder);
|
|
comp.insertConstants(builder);
|
|
|
|
const char* type_names[] = { "sampler1D", "sampler2D", "samplerCube", "sampler3D" };
|
|
for (int i = 0; i < 4; ++i)
|
|
{
|
|
builder << "uniform " << type_names[i] << " " << type_names[i] << "_array[" << allocator.pools[i].pool_size << "];\n";
|
|
}
|
|
|
|
builder << "\n"
|
|
"#define IS_TEXTURE_RESIDENT(index) (texture_handles[index] < 0xFF)\n"
|
|
"#define SAMPLER1D(index) sampler1D_array[texture_handles[index]]\n"
|
|
"#define SAMPLER2D(index) sampler2D_array[texture_handles[index]]\n"
|
|
"#define SAMPLER3D(index) sampler3D_array[texture_handles[index]]\n"
|
|
"#define SAMPLERCUBE(index) samplerCube_array[texture_handles[index]]\n\n";
|
|
|
|
builder <<
|
|
"layout(std430, binding =" << GL_INTERPRETER_FRAGMENT_BLOCK << ") readonly restrict buffer FragmentInstructionBlock\n"
|
|
"{\n"
|
|
" uint shader_control;\n"
|
|
" uint texture_control;\n"
|
|
" uint reserved1;\n"
|
|
" uint reserved2;\n"
|
|
" uint texture_handles[16];\n"
|
|
" uvec4 fp_instructions[];\n"
|
|
"};\n\n";
|
|
|
|
::program_common::insert_fog_declaration(builder, "vec4", "fogc", true);
|
|
|
|
builder << program_common::interpreter::get_fragment_interpreter();
|
|
const std::string s = builder.str();
|
|
|
|
fs.create(glsl::shader::type::fragment);
|
|
fs.source(s);
|
|
fs.compile();
|
|
}
|
|
|
|
void shader_interpreter::update_fragment_textures(
|
|
const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, 16>& descriptors,
|
|
u16 reference_mask, u32* out)
|
|
{
|
|
if (reference_mask == 0)
|
|
{
|
|
return;
|
|
}
|
|
|
|
// Reset allocation
|
|
auto& allocator = texture_pools[1];
|
|
for (unsigned i = 0; i < 4; ++i)
|
|
{
|
|
allocator.pools[i].num_used = 0;
|
|
allocator.pools[i].flags = 0;
|
|
}
|
|
|
|
rsx::simple_array<std::pair<int, int>> replacement_map;
|
|
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
|
|
{
|
|
if (reference_mask & (1 << i))
|
|
{
|
|
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(descriptors[i].get());
|
|
verify(HERE), sampler_state;
|
|
|
|
int pool_id = static_cast<int>(sampler_state->image_type);
|
|
auto& pool = allocator.pools[pool_id];
|
|
|
|
const int old = pool.allocated[pool.num_used];
|
|
if (!pool.allocate(i))
|
|
{
|
|
rsx_log.error("Could not allocate texture resource for shader interpreter.");
|
|
break;
|
|
}
|
|
|
|
out[i] = (pool.num_used - 1);
|
|
if (old != i)
|
|
{
|
|
// Check if the candidate target has also been replaced
|
|
bool found = false;
|
|
for (auto& e : replacement_map)
|
|
{
|
|
if (e.second == old)
|
|
{
|
|
// This replacement consumed this 'old' value
|
|
e.second = i;
|
|
found = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!found)
|
|
{
|
|
replacement_map.push_back({ old, i });
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
out[i] = 0xFF;
|
|
}
|
|
}
|
|
|
|
// Bind TIU locations
|
|
if (replacement_map.empty()) [[likely]]
|
|
{
|
|
return;
|
|
}
|
|
|
|
if (get_driver_caps().vendor_AMD)
|
|
{
|
|
// AMD drivers don't like texture bindings overlapping which means workarounds are needed
|
|
// Technically this is accurate to spec, but makes efficient usage of shader resources difficult
|
|
for (unsigned i = 0; i < replacement_map.size(); ++i)
|
|
{
|
|
for (int j = 0; j < 4; ++j)
|
|
{
|
|
auto& pool = allocator.pools[j];
|
|
for (int k = pool.num_used; k < pool.pool_size; ++k)
|
|
{
|
|
if (pool.allocated[k] == replacement_map[i].second)
|
|
{
|
|
pool.allocated[k] = replacement_map[i].first;
|
|
pool.flags |= static_cast<u32>(interpreter::texture_pool_flags::dirty);
|
|
|
|
// Exit nested loop
|
|
j = 4;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (allocator.pools[0].flags) program_handle.uniforms["sampler1D_array"] = allocator.pools[0].allocated;
|
|
if (allocator.pools[1].flags) program_handle.uniforms["sampler2D_array"] = allocator.pools[1].allocated;
|
|
if (allocator.pools[2].flags) program_handle.uniforms["samplerCube_array"] = allocator.pools[2].allocated;
|
|
if (allocator.pools[3].flags) program_handle.uniforms["sampler3D_array"] = allocator.pools[3].allocated;
|
|
}
|
|
}
|