mirror of
https://github.com/RPCSX/rpcsx.git
synced 2026-01-16 21:51:17 +01:00
- Separate displayed statistics from actual backend statistics. Allows asynchronous flipping to work correctly as it just uses display stats. The real stats are used by the frame scope marker to determine behavior like engaging the FIFO optimizer or skipping draw calls correctly.
2017 lines
63 KiB
C++
2017 lines
63 KiB
C++
#include "stdafx.h"
|
|
#include "Emu/Memory/vm.h"
|
|
#include "Emu/System.h"
|
|
#include "GLGSRender.h"
|
|
#include "GLVertexProgram.h"
|
|
#include "../rsx_methods.h"
|
|
#include "../Common/BufferUtils.h"
|
|
#include "../rsx_utils.h"
|
|
|
|
#define DUMP_VERTEX_DATA 0
|
|
|
|
namespace
|
|
{
|
|
u32 get_max_depth_value(rsx::surface_depth_format format)
|
|
{
|
|
switch (format)
|
|
{
|
|
case rsx::surface_depth_format::z16: return 0xFFFF;
|
|
case rsx::surface_depth_format::z24s8: return 0xFFFFFF;
|
|
}
|
|
fmt::throw_exception("Unknown depth format" HERE);
|
|
}
|
|
}
|
|
|
|
u64 GLGSRender::get_cycles()
|
|
{
|
|
return thread_ctrl::get_cycles(static_cast<named_thread<GLGSRender>&>(*this));
|
|
}
|
|
|
|
GLGSRender::GLGSRender() : GSRender()
|
|
{
|
|
m_shaders_cache = std::make_unique<gl::shader_cache>(m_prog_buffer, "opengl", "v1.91");
|
|
|
|
if (g_cfg.video.disable_vertex_cache || g_cfg.video.multithreaded_rsx)
|
|
m_vertex_cache = std::make_unique<gl::null_vertex_cache>();
|
|
else
|
|
m_vertex_cache = std::make_unique<gl::weak_vertex_cache>();
|
|
|
|
supports_hw_a2c = false;
|
|
supports_multidraw = true;
|
|
}
|
|
|
|
extern CellGcmContextData current_context;
|
|
|
|
namespace
|
|
{
|
|
GLenum comparison_op(rsx::comparison_function op)
|
|
{
|
|
switch (op)
|
|
{
|
|
case rsx::comparison_function::never: return GL_NEVER;
|
|
case rsx::comparison_function::less: return GL_LESS;
|
|
case rsx::comparison_function::equal: return GL_EQUAL;
|
|
case rsx::comparison_function::less_or_equal: return GL_LEQUAL;
|
|
case rsx::comparison_function::greater: return GL_GREATER;
|
|
case rsx::comparison_function::not_equal: return GL_NOTEQUAL;
|
|
case rsx::comparison_function::greater_or_equal: return GL_GEQUAL;
|
|
case rsx::comparison_function::always: return GL_ALWAYS;
|
|
}
|
|
fmt::throw_exception("Unsupported comparison op 0x%X" HERE, (u32)op);
|
|
}
|
|
|
|
GLenum stencil_op(rsx::stencil_op op)
|
|
{
|
|
switch (op)
|
|
{
|
|
case rsx::stencil_op::invert: return GL_INVERT;
|
|
case rsx::stencil_op::keep: return GL_KEEP;
|
|
case rsx::stencil_op::zero: return GL_ZERO;
|
|
case rsx::stencil_op::replace: return GL_REPLACE;
|
|
case rsx::stencil_op::incr: return GL_INCR;
|
|
case rsx::stencil_op::decr: return GL_DECR;
|
|
case rsx::stencil_op::incr_wrap: return GL_INCR_WRAP;
|
|
case rsx::stencil_op::decr_wrap: return GL_DECR_WRAP;
|
|
}
|
|
fmt::throw_exception("Unsupported stencil op 0x%X" HERE, (u32)op);
|
|
}
|
|
|
|
GLenum blend_equation(rsx::blend_equation op)
|
|
{
|
|
switch (op)
|
|
{
|
|
// Note : maybe add is signed on gl
|
|
case rsx::blend_equation::add_signed:
|
|
LOG_TRACE(RSX, "blend equation add_signed used. Emulating using FUNC_ADD");
|
|
case rsx::blend_equation::add: return GL_FUNC_ADD;
|
|
case rsx::blend_equation::min: return GL_MIN;
|
|
case rsx::blend_equation::max: return GL_MAX;
|
|
case rsx::blend_equation::substract: return GL_FUNC_SUBTRACT;
|
|
case rsx::blend_equation::reverse_substract_signed:
|
|
LOG_TRACE(RSX, "blend equation reverse_subtract_signed used. Emulating using FUNC_REVERSE_SUBTRACT");
|
|
case rsx::blend_equation::reverse_substract: return GL_FUNC_REVERSE_SUBTRACT;
|
|
case rsx::blend_equation::reverse_add_signed:
|
|
default:
|
|
LOG_ERROR(RSX, "Blend equation 0x%X is unimplemented!", (u32)op);
|
|
return GL_FUNC_ADD;
|
|
}
|
|
}
|
|
|
|
GLenum blend_factor(rsx::blend_factor op)
|
|
{
|
|
switch (op)
|
|
{
|
|
case rsx::blend_factor::zero: return GL_ZERO;
|
|
case rsx::blend_factor::one: return GL_ONE;
|
|
case rsx::blend_factor::src_color: return GL_SRC_COLOR;
|
|
case rsx::blend_factor::one_minus_src_color: return GL_ONE_MINUS_SRC_COLOR;
|
|
case rsx::blend_factor::dst_color: return GL_DST_COLOR;
|
|
case rsx::blend_factor::one_minus_dst_color: return GL_ONE_MINUS_DST_COLOR;
|
|
case rsx::blend_factor::src_alpha: return GL_SRC_ALPHA;
|
|
case rsx::blend_factor::one_minus_src_alpha: return GL_ONE_MINUS_SRC_ALPHA;
|
|
case rsx::blend_factor::dst_alpha: return GL_DST_ALPHA;
|
|
case rsx::blend_factor::one_minus_dst_alpha: return GL_ONE_MINUS_DST_ALPHA;
|
|
case rsx::blend_factor::src_alpha_saturate: return GL_SRC_ALPHA_SATURATE;
|
|
case rsx::blend_factor::constant_color: return GL_CONSTANT_COLOR;
|
|
case rsx::blend_factor::one_minus_constant_color: return GL_ONE_MINUS_CONSTANT_COLOR;
|
|
case rsx::blend_factor::constant_alpha: return GL_CONSTANT_ALPHA;
|
|
case rsx::blend_factor::one_minus_constant_alpha: return GL_ONE_MINUS_CONSTANT_ALPHA;
|
|
}
|
|
fmt::throw_exception("Unsupported blend factor 0x%X" HERE, (u32)op);
|
|
}
|
|
|
|
GLenum logic_op(rsx::logic_op op)
|
|
{
|
|
switch (op)
|
|
{
|
|
case rsx::logic_op::logic_clear: return GL_CLEAR;
|
|
case rsx::logic_op::logic_and: return GL_AND;
|
|
case rsx::logic_op::logic_and_reverse: return GL_AND_REVERSE;
|
|
case rsx::logic_op::logic_copy: return GL_COPY;
|
|
case rsx::logic_op::logic_and_inverted: return GL_AND_INVERTED;
|
|
case rsx::logic_op::logic_noop: return GL_NOOP;
|
|
case rsx::logic_op::logic_xor: return GL_XOR;
|
|
case rsx::logic_op::logic_or: return GL_OR;
|
|
case rsx::logic_op::logic_nor: return GL_NOR;
|
|
case rsx::logic_op::logic_equiv: return GL_EQUIV;
|
|
case rsx::logic_op::logic_invert: return GL_INVERT;
|
|
case rsx::logic_op::logic_or_reverse: return GL_OR_REVERSE;
|
|
case rsx::logic_op::logic_copy_inverted: return GL_COPY_INVERTED;
|
|
case rsx::logic_op::logic_or_inverted: return GL_OR_INVERTED;
|
|
case rsx::logic_op::logic_nand: return GL_NAND;
|
|
case rsx::logic_op::logic_set: return GL_SET;
|
|
}
|
|
fmt::throw_exception("Unsupported logic op 0x%X" HERE, (u32)op);
|
|
}
|
|
|
|
GLenum front_face(rsx::front_face op)
|
|
{
|
|
//NOTE: RSX face winding is always based off of upper-left corner like vulkan, but GL is bottom left
|
|
//shader_window_origin register does not affect this
|
|
//verified with Outrun Online Arcade (window_origin::top) and DS2 (window_origin::bottom)
|
|
//correctness of face winding checked using stencil test (GOW collection shadows)
|
|
switch (op)
|
|
{
|
|
case rsx::front_face::cw: return GL_CCW;
|
|
case rsx::front_face::ccw: return GL_CW;
|
|
}
|
|
fmt::throw_exception("Unsupported front face 0x%X" HERE, (u32)op);
|
|
}
|
|
|
|
GLenum cull_face(rsx::cull_face op)
|
|
{
|
|
switch (op)
|
|
{
|
|
case rsx::cull_face::front: return GL_FRONT;
|
|
case rsx::cull_face::back: return GL_BACK;
|
|
case rsx::cull_face::front_and_back: return GL_FRONT_AND_BACK;
|
|
}
|
|
fmt::throw_exception("Unsupported cull face 0x%X" HERE, (u32)op);
|
|
}
|
|
}
|
|
|
|
void GLGSRender::begin()
|
|
{
|
|
rsx::thread::begin();
|
|
|
|
if (skip_current_frame ||
|
|
(conditional_render_enabled && conditional_render_test_failed))
|
|
return;
|
|
|
|
init_buffers(rsx::framebuffer_creation_context::context_draw);
|
|
}
|
|
|
|
void GLGSRender::end()
|
|
{
|
|
m_profiler.start();
|
|
|
|
if (skip_current_frame || !framebuffer_status_valid ||
|
|
(conditional_render_enabled && conditional_render_test_failed))
|
|
{
|
|
execute_nop_draw();
|
|
rsx::thread::end();
|
|
return;
|
|
}
|
|
|
|
m_frame_stats.setup_time += m_profiler.duration();
|
|
|
|
const auto do_heap_cleanup = [this]()
|
|
{
|
|
if (manually_flush_ring_buffers)
|
|
{
|
|
m_attrib_ring_buffer->unmap();
|
|
m_index_ring_buffer->unmap();
|
|
}
|
|
else
|
|
{
|
|
//DMA push; not needed with MAP_COHERENT
|
|
//glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
|
|
}
|
|
};
|
|
|
|
gl::command_context cmd{ gl_state };
|
|
gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil);
|
|
|
|
// Handle special memory barrier for ARGB8->D24S8 in an active DSV
|
|
if (ds && ds->old_contents.size() == 1 &&
|
|
ds->old_contents[0].source->get_internal_format() == gl::texture::internal_format::rgba8)
|
|
{
|
|
gl_state.enable(GL_FALSE, GL_SCISSOR_TEST);
|
|
|
|
// TODO: Stencil transfer
|
|
gl::g_hw_blitter->fast_clear_image(cmd, ds, 1.f, 0xFF);
|
|
ds->old_contents[0].init_transfer(ds);
|
|
|
|
m_depth_converter.run(ds->old_contents[0].src_rect(),
|
|
ds->old_contents[0].dst_rect(),
|
|
ds->old_contents[0].source, ds);
|
|
|
|
ds->on_write();
|
|
}
|
|
|
|
// Load textures
|
|
{
|
|
m_profiler.start();
|
|
|
|
std::lock_guard lock(m_sampler_mutex);
|
|
bool update_framebuffer_sourced = false;
|
|
|
|
if (surface_store_tag != m_rtts.cache_tag)
|
|
{
|
|
update_framebuffer_sourced = true;
|
|
surface_store_tag = m_rtts.cache_tag;
|
|
}
|
|
|
|
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
|
|
{
|
|
if (!fs_sampler_state[i])
|
|
fs_sampler_state[i] = std::make_unique<gl::texture_cache::sampled_image_descriptor>();
|
|
|
|
if (m_samplers_dirty || m_textures_dirty[i] ||
|
|
(update_framebuffer_sourced && fs_sampler_state[i]->upload_context == rsx::texture_upload_context::framebuffer_storage))
|
|
{
|
|
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
|
|
|
|
if (rsx::method_registers.fragment_textures[i].enabled())
|
|
{
|
|
*sampler_state = m_gl_texture_cache.upload_texture(cmd, rsx::method_registers.fragment_textures[i], m_rtts);
|
|
|
|
if (m_textures_dirty[i])
|
|
m_fs_sampler_states[i].apply(rsx::method_registers.fragment_textures[i], fs_sampler_state[i].get());
|
|
}
|
|
else
|
|
{
|
|
*sampler_state = {};
|
|
}
|
|
|
|
m_textures_dirty[i] = false;
|
|
}
|
|
}
|
|
|
|
for (int i = 0; i < rsx::limits::vertex_textures_count; ++i)
|
|
{
|
|
if (!vs_sampler_state[i])
|
|
vs_sampler_state[i] = std::make_unique<gl::texture_cache::sampled_image_descriptor>();
|
|
|
|
if (m_samplers_dirty || m_vertex_textures_dirty[i] ||
|
|
(update_framebuffer_sourced && vs_sampler_state[i]->upload_context == rsx::texture_upload_context::framebuffer_storage))
|
|
{
|
|
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get());
|
|
|
|
if (rsx::method_registers.vertex_textures[i].enabled())
|
|
{
|
|
*sampler_state = m_gl_texture_cache.upload_texture(cmd, rsx::method_registers.vertex_textures[i], m_rtts);
|
|
|
|
if (m_vertex_textures_dirty[i])
|
|
m_vs_sampler_states[i].apply(rsx::method_registers.vertex_textures[i], vs_sampler_state[i].get());
|
|
}
|
|
else
|
|
*sampler_state = {};
|
|
|
|
m_vertex_textures_dirty[i] = false;
|
|
}
|
|
}
|
|
|
|
m_samplers_dirty.store(false);
|
|
|
|
m_frame_stats.textures_upload_time += m_profiler.duration();
|
|
}
|
|
|
|
// NOTE: Due to common OpenGL driver architecture, vertex data has to be uploaded as far away from the draw as possible
|
|
// TODO: Implement shaders cache prediction to avoid uploading vertex data if draw is going to skip
|
|
if (!load_program())
|
|
{
|
|
// Program is not ready, skip drawing this
|
|
std::this_thread::yield();
|
|
execute_nop_draw();
|
|
// m_rtts.on_write(); - breaks games for obvious reasons
|
|
rsx::thread::end();
|
|
return;
|
|
}
|
|
|
|
// Load program execution environment
|
|
load_program_env();
|
|
|
|
m_frame_stats.setup_time += m_profiler.duration();
|
|
|
|
//Bind textures and resolve external copy operations
|
|
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
|
|
{
|
|
if (current_fp_metadata.referenced_textures_mask & (1 << i))
|
|
{
|
|
_SelectTexture(GL_FRAGMENT_TEXTURES_START + i);
|
|
|
|
gl::texture_view* view = nullptr;
|
|
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
|
|
|
|
if (rsx::method_registers.fragment_textures[i].enabled() &&
|
|
sampler_state->validate())
|
|
{
|
|
if (view = sampler_state->image_handle; UNLIKELY(!view))
|
|
{
|
|
view = m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc);
|
|
}
|
|
}
|
|
|
|
if (LIKELY(view))
|
|
{
|
|
view->bind();
|
|
|
|
if (current_fragment_program.redirected_textures & (1 << i))
|
|
{
|
|
_SelectTexture(GL_STENCIL_MIRRORS_START + i);
|
|
|
|
auto root_texture = static_cast<gl::viewable_image*>(view->image());
|
|
auto stencil_view = root_texture->get_view(0xAAE4, rsx::default_remap_vector, gl::image_aspect::stencil);
|
|
stencil_view->bind();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
auto target = gl::get_target(current_fragment_program.get_texture_dimension(i));
|
|
glBindTexture(target, m_null_textures[target]->id());
|
|
|
|
if (current_fragment_program.redirected_textures & (1 << i))
|
|
{
|
|
_SelectTexture(GL_STENCIL_MIRRORS_START + i);
|
|
glBindTexture(target, m_null_textures[target]->id());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
for (int i = 0; i < rsx::limits::vertex_textures_count; ++i)
|
|
{
|
|
if (current_vp_metadata.referenced_textures_mask & (1 << i))
|
|
{
|
|
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get());
|
|
_SelectTexture(GL_VERTEX_TEXTURES_START + i);
|
|
|
|
if (rsx::method_registers.vertex_textures[i].enabled() &&
|
|
sampler_state->validate())
|
|
{
|
|
if (LIKELY(sampler_state->image_handle))
|
|
{
|
|
sampler_state->image_handle->bind();
|
|
}
|
|
else
|
|
{
|
|
m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc)->bind();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
glBindTexture(GL_TEXTURE_2D, GL_NONE);
|
|
}
|
|
}
|
|
}
|
|
|
|
m_frame_stats.textures_upload_time += m_profiler.duration();
|
|
|
|
// Optionally do memory synchronization if the texture stage has not yet triggered this
|
|
if (true)//g_cfg.video.strict_rendering_mode)
|
|
{
|
|
gl_state.enable(GL_FALSE, GL_SCISSOR_TEST);
|
|
|
|
if (ds) ds->write_barrier(cmd);
|
|
|
|
for (auto &rtt : m_rtts.m_bound_render_targets)
|
|
{
|
|
if (auto surface = std::get<1>(rtt))
|
|
{
|
|
surface->write_barrier(cmd);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
rsx::simple_array<int> buffers_to_clear;
|
|
bool clear_all_color = true;
|
|
bool clear_depth = false;
|
|
|
|
for (int index = 0; index < 4; index++)
|
|
{
|
|
if (m_rtts.m_bound_render_targets[index].first)
|
|
{
|
|
if (!m_rtts.m_bound_render_targets[index].second->dirty())
|
|
clear_all_color = false;
|
|
else
|
|
buffers_to_clear.push_back(index);
|
|
}
|
|
}
|
|
|
|
if (ds && ds->dirty())
|
|
{
|
|
clear_depth = true;
|
|
}
|
|
|
|
if (clear_depth || !buffers_to_clear.empty())
|
|
{
|
|
gl_state.enable(GL_FALSE, GL_SCISSOR_TEST);
|
|
GLenum mask = 0;
|
|
|
|
if (clear_depth)
|
|
{
|
|
gl_state.depth_mask(GL_TRUE);
|
|
gl_state.clear_depth(1.f);
|
|
gl_state.clear_stencil(255);
|
|
mask |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
|
|
}
|
|
|
|
if (clear_all_color)
|
|
mask |= GL_COLOR_BUFFER_BIT;
|
|
|
|
glClear(mask);
|
|
|
|
if (!buffers_to_clear.empty() && !clear_all_color)
|
|
{
|
|
GLfloat colors[] = { 0.f, 0.f, 0.f, 0.f };
|
|
//It is impossible for the render target to be type A or B here (clear all would have been flagged)
|
|
for (auto &i : buffers_to_clear)
|
|
glClearBufferfv(GL_COLOR, i, colors);
|
|
}
|
|
|
|
if (clear_depth)
|
|
gl_state.depth_mask(rsx::method_registers.depth_write_enabled());
|
|
}
|
|
}
|
|
|
|
// Unconditionally enable stencil test if it was disabled before
|
|
gl_state.enable(GL_TRUE, GL_SCISSOR_TEST);
|
|
|
|
update_draw_state();
|
|
|
|
if (g_cfg.video.debug_output)
|
|
{
|
|
m_program->validate();
|
|
}
|
|
|
|
const GLenum draw_mode = gl::draw_mode(rsx::method_registers.current_draw_clause.primitive);
|
|
rsx::method_registers.current_draw_clause.begin();
|
|
int subdraw = 0;
|
|
do
|
|
{
|
|
if (!subdraw)
|
|
{
|
|
analyse_inputs_interleaved(m_vertex_layout);
|
|
if (!m_vertex_layout.validate())
|
|
{
|
|
// Execute remainining pipeline barriers with NOP draw
|
|
do
|
|
{
|
|
rsx::method_registers.current_draw_clause.execute_pipeline_dependencies();
|
|
}
|
|
while (rsx::method_registers.current_draw_clause.next());
|
|
|
|
rsx::method_registers.current_draw_clause.end();
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (rsx::method_registers.current_draw_clause.execute_pipeline_dependencies() & rsx::vertex_base_changed)
|
|
{
|
|
// Rebase vertex bases instead of
|
|
for (auto &info : m_vertex_layout.interleaved_blocks)
|
|
{
|
|
const auto vertex_base_offset = rsx::method_registers.vertex_data_base_offset();
|
|
info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info.base_offset), info.memory_location);
|
|
}
|
|
}
|
|
}
|
|
|
|
++subdraw;
|
|
|
|
if (manually_flush_ring_buffers)
|
|
{
|
|
//Use approximations to reserve space. This path is mostly for debug purposes anyway
|
|
u32 approx_vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
|
|
u32 approx_working_buffer_size = approx_vertex_count * 256;
|
|
|
|
//Allocate 256K heap if we have no approximation at this time (inlined array)
|
|
m_attrib_ring_buffer->reserve_storage_on_heap(std::max(approx_working_buffer_size, 256 * 1024U));
|
|
m_index_ring_buffer->reserve_storage_on_heap(16 * 1024);
|
|
}
|
|
|
|
//Do vertex upload before RTT prep / texture lookups to give the driver time to push data
|
|
auto upload_info = set_vertex_buffer();
|
|
do_heap_cleanup();
|
|
|
|
if (upload_info.vertex_draw_count == 0)
|
|
{
|
|
// Malformed vertex setup; abort
|
|
continue;
|
|
}
|
|
|
|
update_vertex_env(upload_info);
|
|
|
|
if (!upload_info.index_info)
|
|
{
|
|
if (rsx::method_registers.current_draw_clause.is_single_draw())
|
|
{
|
|
glDrawArrays(draw_mode, 0, upload_info.vertex_draw_count);
|
|
}
|
|
else
|
|
{
|
|
const auto subranges = rsx::method_registers.current_draw_clause.get_subranges();
|
|
const auto draw_count = subranges.size();
|
|
const auto driver_caps = gl::get_driver_caps();
|
|
bool use_draw_arrays_fallback = false;
|
|
|
|
m_scratch_buffer.resize(draw_count * 24);
|
|
GLint* firsts = (GLint*)m_scratch_buffer.data();
|
|
GLsizei* counts = (GLsizei*)(firsts + draw_count);
|
|
const GLvoid** offsets = (const GLvoid**)(counts + draw_count);
|
|
|
|
u32 first = 0;
|
|
u32 dst_index = 0;
|
|
for (const auto &range : subranges)
|
|
{
|
|
firsts[dst_index] = first;
|
|
counts[dst_index] = range.count;
|
|
offsets[dst_index++] = (const GLvoid*)(u64(first << 2));
|
|
|
|
if (driver_caps.vendor_AMD && (first + range.count) > (0x100000 >> 2))
|
|
{
|
|
//Unlikely, but added here in case the identity buffer is not large enough somehow
|
|
use_draw_arrays_fallback = true;
|
|
break;
|
|
}
|
|
|
|
first += range.count;
|
|
}
|
|
|
|
if (use_draw_arrays_fallback)
|
|
{
|
|
//MultiDrawArrays is broken on some primitive types using AMD. One known type is GL_TRIANGLE_STRIP but there could be more
|
|
for (u32 n = 0; n < draw_count; ++n)
|
|
{
|
|
glDrawArrays(draw_mode, firsts[n], counts[n]);
|
|
}
|
|
}
|
|
else if (driver_caps.vendor_AMD)
|
|
{
|
|
//Use identity index buffer to fix broken vertexID on AMD
|
|
m_identity_index_buffer->bind();
|
|
glMultiDrawElements(draw_mode, counts, GL_UNSIGNED_INT, offsets, (GLsizei)draw_count);
|
|
}
|
|
else
|
|
{
|
|
//Normal render
|
|
glMultiDrawArrays(draw_mode, firsts, counts, (GLsizei)draw_count);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
const GLenum index_type = std::get<0>(*upload_info.index_info);
|
|
const u32 index_offset = std::get<1>(*upload_info.index_info);
|
|
const bool restarts_valid = gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive) && !rsx::method_registers.current_draw_clause.is_disjoint_primitive;
|
|
|
|
if (gl_state.enable(restarts_valid && rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART))
|
|
{
|
|
glPrimitiveRestartIndex((index_type == GL_UNSIGNED_SHORT) ? 0xffff : 0xffffffff);
|
|
}
|
|
|
|
m_index_ring_buffer->bind();
|
|
|
|
if (rsx::method_registers.current_draw_clause.is_single_draw())
|
|
{
|
|
glDrawElements(draw_mode, upload_info.vertex_draw_count, index_type, (GLvoid *)(uintptr_t)index_offset);
|
|
}
|
|
else
|
|
{
|
|
const auto subranges = rsx::method_registers.current_draw_clause.get_subranges();
|
|
const auto draw_count = subranges.size();
|
|
const u32 type_scale = (index_type == GL_UNSIGNED_SHORT) ? 1 : 2;
|
|
uintptr_t index_ptr = index_offset;
|
|
m_scratch_buffer.resize(draw_count * 16);
|
|
|
|
GLsizei *counts = (GLsizei*)m_scratch_buffer.data();
|
|
const GLvoid** offsets = (const GLvoid**)(counts + draw_count);
|
|
int dst_index = 0;
|
|
|
|
for (const auto &range : subranges)
|
|
{
|
|
const auto index_size = get_index_count(rsx::method_registers.current_draw_clause.primitive, range.count);
|
|
counts[dst_index] = index_size;
|
|
offsets[dst_index++] = (const GLvoid*)index_ptr;
|
|
|
|
index_ptr += (index_size << type_scale);
|
|
}
|
|
|
|
glMultiDrawElements(draw_mode, counts, index_type, offsets, (GLsizei)draw_count);
|
|
}
|
|
}
|
|
} while (rsx::method_registers.current_draw_clause.next());
|
|
|
|
m_rtts.on_write(m_framebuffer_layout.color_write_enabled.data(), m_framebuffer_layout.zeta_write_enabled);
|
|
|
|
m_attrib_ring_buffer->notify();
|
|
m_index_ring_buffer->notify();
|
|
m_fragment_env_buffer->notify();
|
|
m_vertex_env_buffer->notify();
|
|
m_texture_parameters_buffer->notify();
|
|
m_vertex_layout_buffer->notify();
|
|
m_fragment_constants_buffer->notify();
|
|
m_transform_constants_buffer->notify();
|
|
|
|
m_frame_stats.textures_upload_time += m_profiler.duration();
|
|
|
|
rsx::thread::end();
|
|
}
|
|
|
|
void GLGSRender::set_viewport()
|
|
{
|
|
// NOTE: scale offset matrix already contains the viewport transformation
|
|
const auto clip_width = rsx::apply_resolution_scale(rsx::method_registers.surface_clip_width(), true);
|
|
const auto clip_height = rsx::apply_resolution_scale(rsx::method_registers.surface_clip_height(), true);
|
|
glViewport(0, 0, clip_width, clip_height);
|
|
}
|
|
|
|
void GLGSRender::set_scissor(bool clip_viewport)
|
|
{
|
|
areau scissor;
|
|
if (get_scissor(scissor, clip_viewport))
|
|
{
|
|
// NOTE: window origin does not affect scissor region (probably only affects viewport matrix; already applied)
|
|
// See LIMBO [NPUB-30373] which uses shader window origin = top
|
|
glScissor(scissor.x1, scissor.y1, scissor.width(), scissor.height());
|
|
gl_state.enable(GL_TRUE, GL_SCISSOR_TEST);
|
|
}
|
|
}
|
|
|
|
void GLGSRender::on_init_thread()
|
|
{
|
|
verify(HERE), m_frame;
|
|
|
|
// NOTES: All contexts have to be created before any is bound to a thread
|
|
// This allows context sharing to work (both GLRCs passed to wglShareLists have to be idle or you get ERROR_BUSY)
|
|
m_context = m_frame->make_context();
|
|
|
|
if (!g_cfg.video.disable_asynchronous_shader_compiler)
|
|
{
|
|
m_decompiler_context = m_frame->make_context();
|
|
}
|
|
|
|
// Bind primary context to main RSX thread
|
|
m_frame->set_current(m_context);
|
|
|
|
zcull_ctrl.reset(static_cast<::rsx::reports::ZCULL_control*>(this));
|
|
|
|
gl::init();
|
|
|
|
//Enable adaptive vsync if vsync is requested
|
|
gl::set_swapinterval(g_cfg.video.vsync ? -1 : 0);
|
|
|
|
if (g_cfg.video.debug_output)
|
|
gl::enable_debugging();
|
|
|
|
LOG_NOTICE(RSX, "GL RENDERER: %s (%s)", (const char*)glGetString(GL_RENDERER), (const char*)glGetString(GL_VENDOR));
|
|
LOG_NOTICE(RSX, "GL VERSION: %s", (const char*)glGetString(GL_VERSION));
|
|
LOG_NOTICE(RSX, "GLSL VERSION: %s", (const char*)glGetString(GL_SHADING_LANGUAGE_VERSION));
|
|
|
|
auto& gl_caps = gl::get_driver_caps();
|
|
|
|
if (!gl_caps.ARB_texture_buffer_supported)
|
|
{
|
|
fmt::throw_exception("Failed to initialize OpenGL renderer. ARB_texture_buffer_object is required but not supported by your GPU");
|
|
}
|
|
|
|
if (!gl_caps.ARB_dsa_supported && !gl_caps.EXT_dsa_supported)
|
|
{
|
|
fmt::throw_exception("Failed to initialize OpenGL renderer. ARB_direct_state_access or EXT_direct_state_access is required but not supported by your GPU");
|
|
}
|
|
|
|
if (!gl_caps.ARB_depth_buffer_float_supported && g_cfg.video.force_high_precision_z_buffer)
|
|
{
|
|
LOG_WARNING(RSX, "High precision Z buffer requested but your GPU does not support GL_ARB_depth_buffer_float. Option ignored.");
|
|
}
|
|
|
|
if (!gl_caps.ARB_texture_barrier_supported && !gl_caps.NV_texture_barrier_supported && !g_cfg.video.strict_rendering_mode)
|
|
{
|
|
LOG_WARNING(RSX, "Texture barriers are not supported by your GPU. Feedback loops will have undefined results.");
|
|
}
|
|
|
|
//Use industry standard resource alignment values as defaults
|
|
m_uniform_buffer_offset_align = 256;
|
|
m_min_texbuffer_alignment = 256;
|
|
m_max_texbuffer_size = 0;
|
|
|
|
glEnable(GL_VERTEX_PROGRAM_POINT_SIZE);
|
|
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &m_uniform_buffer_offset_align);
|
|
glGetIntegerv(GL_TEXTURE_BUFFER_OFFSET_ALIGNMENT, &m_min_texbuffer_alignment);
|
|
glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, &m_max_texbuffer_size);
|
|
m_vao.create();
|
|
|
|
//Set min alignment to 16-bytes for SSE optimizations with aligned addresses to work
|
|
m_min_texbuffer_alignment = std::max(m_min_texbuffer_alignment, 16);
|
|
m_uniform_buffer_offset_align = std::max(m_uniform_buffer_offset_align, 16);
|
|
|
|
LOG_NOTICE(RSX, "Supported texel buffer size reported: %d bytes", m_max_texbuffer_size);
|
|
if (m_max_texbuffer_size < (16 * 0x100000))
|
|
{
|
|
LOG_ERROR(RSX, "Max texture buffer size supported is less than 16M which is useless. Expect undefined behaviour.");
|
|
m_max_texbuffer_size = (16 * 0x100000);
|
|
}
|
|
|
|
//Array stream buffer
|
|
{
|
|
m_gl_persistent_stream_buffer = std::make_unique<gl::texture>(GL_TEXTURE_BUFFER, 0, 0, 0, 0, GL_R8UI);
|
|
_SelectTexture(GL_STREAM_BUFFER_START + 0);
|
|
glBindTexture(GL_TEXTURE_BUFFER, m_gl_persistent_stream_buffer->id());
|
|
}
|
|
|
|
//Register stream buffer
|
|
{
|
|
m_gl_volatile_stream_buffer = std::make_unique<gl::texture>(GL_TEXTURE_BUFFER, 0, 0, 0, 0, GL_R8UI);
|
|
_SelectTexture(GL_STREAM_BUFFER_START + 1);
|
|
glBindTexture(GL_TEXTURE_BUFFER, m_gl_volatile_stream_buffer->id());
|
|
}
|
|
|
|
//Fallback null texture instead of relying on texture0
|
|
{
|
|
std::vector<u32> pixeldata = { 0, 0, 0, 0 };
|
|
|
|
//1D
|
|
auto tex1D = std::make_unique<gl::texture>(GL_TEXTURE_1D, 1, 1, 1, 1, GL_RGBA8);
|
|
tex1D->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8);
|
|
|
|
//2D
|
|
auto tex2D = std::make_unique<gl::texture>(GL_TEXTURE_2D, 1, 1, 1, 1, GL_RGBA8);
|
|
tex2D->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8);
|
|
|
|
//3D
|
|
auto tex3D = std::make_unique<gl::texture>(GL_TEXTURE_3D, 1, 1, 1, 1, GL_RGBA8);
|
|
tex3D->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8);
|
|
|
|
//CUBE
|
|
auto texCUBE = std::make_unique<gl::texture>(GL_TEXTURE_CUBE_MAP, 1, 1, 1, 1, GL_RGBA8);
|
|
texCUBE->copy_from(pixeldata.data(), gl::texture::format::rgba, gl::texture::type::uint_8_8_8_8);
|
|
|
|
m_null_textures[GL_TEXTURE_1D] = std::move(tex1D);
|
|
m_null_textures[GL_TEXTURE_2D] = std::move(tex2D);
|
|
m_null_textures[GL_TEXTURE_3D] = std::move(tex3D);
|
|
m_null_textures[GL_TEXTURE_CUBE_MAP] = std::move(texCUBE);
|
|
}
|
|
|
|
if (!gl_caps.ARB_buffer_storage_supported)
|
|
{
|
|
LOG_WARNING(RSX, "Forcing use of legacy OpenGL buffers because ARB_buffer_storage is not supported");
|
|
// TODO: do not modify config options
|
|
g_cfg.video.gl_legacy_buffers.from_string("true");
|
|
}
|
|
|
|
if (g_cfg.video.gl_legacy_buffers)
|
|
{
|
|
LOG_WARNING(RSX, "Using legacy openGL buffers.");
|
|
manually_flush_ring_buffers = true;
|
|
|
|
m_attrib_ring_buffer = std::make_unique<gl::legacy_ring_buffer>();
|
|
m_transform_constants_buffer = std::make_unique<gl::legacy_ring_buffer>();
|
|
m_fragment_constants_buffer = std::make_unique<gl::legacy_ring_buffer>();
|
|
m_fragment_env_buffer = std::make_unique<gl::legacy_ring_buffer>();
|
|
m_vertex_env_buffer = std::make_unique<gl::legacy_ring_buffer>();
|
|
m_texture_parameters_buffer = std::make_unique<gl::legacy_ring_buffer>();
|
|
m_vertex_layout_buffer = std::make_unique<gl::legacy_ring_buffer>();
|
|
m_index_ring_buffer = std::make_unique<gl::legacy_ring_buffer>();
|
|
}
|
|
else
|
|
{
|
|
m_attrib_ring_buffer = std::make_unique<gl::ring_buffer>();
|
|
m_transform_constants_buffer = std::make_unique<gl::ring_buffer>();
|
|
m_fragment_constants_buffer = std::make_unique<gl::ring_buffer>();
|
|
m_fragment_env_buffer = std::make_unique<gl::ring_buffer>();
|
|
m_vertex_env_buffer = std::make_unique<gl::ring_buffer>();
|
|
m_texture_parameters_buffer = std::make_unique<gl::ring_buffer>();
|
|
m_vertex_layout_buffer = std::make_unique<gl::ring_buffer>();
|
|
m_index_ring_buffer = std::make_unique<gl::ring_buffer>();
|
|
}
|
|
|
|
m_attrib_ring_buffer->create(gl::buffer::target::texture, 256 * 0x100000);
|
|
m_index_ring_buffer->create(gl::buffer::target::element_array, 64 * 0x100000);
|
|
m_transform_constants_buffer->create(gl::buffer::target::uniform, 64 * 0x100000);
|
|
m_fragment_constants_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
|
|
m_fragment_env_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
|
|
m_vertex_env_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
|
|
m_texture_parameters_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
|
|
m_vertex_layout_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
|
|
|
|
if (gl_caps.vendor_AMD)
|
|
{
|
|
m_identity_index_buffer = std::make_unique<gl::buffer>();
|
|
m_identity_index_buffer->create(gl::buffer::target::element_array, 1 * 0x100000, nullptr, gl::buffer::memory_type::host_visible);
|
|
|
|
// Initialize with 256k identity entries
|
|
auto *dst = (u32*)m_identity_index_buffer->map(gl::buffer::access::write);
|
|
for (u32 n = 0; n < (0x100000 >> 2); ++n)
|
|
{
|
|
dst[n] = n;
|
|
}
|
|
|
|
m_identity_index_buffer->unmap();
|
|
}
|
|
|
|
m_persistent_stream_view.update(m_attrib_ring_buffer.get(), 0, std::min<u32>((u32)m_attrib_ring_buffer->size(), m_max_texbuffer_size));
|
|
m_volatile_stream_view.update(m_attrib_ring_buffer.get(), 0, std::min<u32>((u32)m_attrib_ring_buffer->size(), m_max_texbuffer_size));
|
|
m_gl_persistent_stream_buffer->copy_from(m_persistent_stream_view);
|
|
m_gl_volatile_stream_buffer->copy_from(m_volatile_stream_view);
|
|
|
|
m_vao.element_array_buffer = *m_index_ring_buffer;
|
|
|
|
if (g_cfg.video.overlay)
|
|
{
|
|
if (gl_caps.ARB_shader_draw_parameters_supported)
|
|
{
|
|
m_text_printer.init();
|
|
m_text_printer.set_enabled(true);
|
|
}
|
|
}
|
|
|
|
int image_unit = 0;
|
|
for (auto &sampler : m_fs_sampler_states)
|
|
{
|
|
sampler.create();
|
|
sampler.bind(image_unit++);
|
|
}
|
|
|
|
for (auto &sampler : m_fs_sampler_mirror_states)
|
|
{
|
|
sampler.create();
|
|
sampler.apply_defaults();
|
|
sampler.bind(image_unit++);
|
|
}
|
|
|
|
for (auto &sampler : m_vs_sampler_states)
|
|
{
|
|
sampler.create();
|
|
sampler.bind(image_unit++);
|
|
}
|
|
|
|
//Occlusion query
|
|
for (u32 i = 0; i < occlusion_query_count; ++i)
|
|
{
|
|
GLuint handle = 0;
|
|
auto &query = m_occlusion_query_data[i];
|
|
glGenQueries(1, &handle);
|
|
|
|
query.driver_handle = (u64)handle;
|
|
query.pending = false;
|
|
query.active = false;
|
|
query.result = 0;
|
|
}
|
|
|
|
//Clip planes are shader controlled; enable all planes driver-side
|
|
glEnable(GL_CLIP_DISTANCE0 + 0);
|
|
glEnable(GL_CLIP_DISTANCE0 + 1);
|
|
glEnable(GL_CLIP_DISTANCE0 + 2);
|
|
glEnable(GL_CLIP_DISTANCE0 + 3);
|
|
glEnable(GL_CLIP_DISTANCE0 + 4);
|
|
glEnable(GL_CLIP_DISTANCE0 + 5);
|
|
|
|
m_depth_converter.create();
|
|
m_ui_renderer.create();
|
|
m_video_output_pass.create();
|
|
|
|
m_gl_texture_cache.initialize();
|
|
|
|
if (!m_overlay_manager)
|
|
{
|
|
m_frame->hide();
|
|
m_shaders_cache->load(nullptr);
|
|
m_frame->show();
|
|
}
|
|
else
|
|
{
|
|
struct native_helper : gl::shader_cache::progress_dialog_helper
|
|
{
|
|
rsx::thread *owner = nullptr;
|
|
std::shared_ptr<rsx::overlays::message_dialog> dlg;
|
|
|
|
native_helper(GLGSRender *ptr) :
|
|
owner(ptr) {}
|
|
|
|
void create() override
|
|
{
|
|
MsgDialogType type = {};
|
|
type.disable_cancel = true;
|
|
type.progress_bar_count = 2;
|
|
|
|
dlg = g_fxo->get<rsx::overlays::display_manager>()->create<rsx::overlays::message_dialog>((bool)g_cfg.video.shader_preloading_dialog.use_custom_background);
|
|
dlg->progress_bar_set_taskbar_index(-1);
|
|
dlg->show("Loading precompiled shaders from disk...", type, [](s32 status)
|
|
{
|
|
if (status != CELL_OK)
|
|
Emu.Stop();
|
|
});
|
|
}
|
|
|
|
void update_msg(u32 index, u32 processed, u32 entry_count) override
|
|
{
|
|
const char *text = index == 0 ? "Loading pipeline object %u of %u" : "Compiling pipeline object %u of %u";
|
|
dlg->progress_bar_set_message(index, fmt::format(text, processed, entry_count));
|
|
owner->flip({});
|
|
}
|
|
|
|
void inc_value(u32 index, u32 value) override
|
|
{
|
|
dlg->progress_bar_increment(index, (f32)value);
|
|
owner->flip({});
|
|
}
|
|
|
|
void set_limit(u32 index, u32 limit) override
|
|
{
|
|
dlg->progress_bar_set_limit(index, limit);
|
|
owner->flip({});
|
|
}
|
|
|
|
void refresh() override
|
|
{
|
|
dlg->refresh();
|
|
}
|
|
|
|
void close() override
|
|
{
|
|
dlg->return_code = CELL_OK;
|
|
dlg->close();
|
|
}
|
|
}
|
|
helper(this);
|
|
|
|
m_shaders_cache->load(&helper);
|
|
}
|
|
}
|
|
|
|
|
|
void GLGSRender::on_exit()
|
|
{
|
|
zcull_ctrl.release();
|
|
|
|
m_prog_buffer.clear();
|
|
m_rtts.destroy();
|
|
|
|
for (auto &fbo : m_framebuffer_cache)
|
|
{
|
|
fbo.remove();
|
|
}
|
|
|
|
m_framebuffer_cache.clear();
|
|
|
|
if (m_flip_fbo)
|
|
{
|
|
m_flip_fbo.remove();
|
|
}
|
|
|
|
if (m_flip_tex_color)
|
|
{
|
|
m_flip_tex_color.reset();
|
|
}
|
|
|
|
if (m_vao)
|
|
{
|
|
m_vao.remove();
|
|
}
|
|
|
|
m_gl_persistent_stream_buffer.reset();
|
|
m_gl_volatile_stream_buffer.reset();
|
|
|
|
for (auto &sampler : m_fs_sampler_states)
|
|
{
|
|
sampler.remove();
|
|
}
|
|
|
|
for (auto &sampler : m_fs_sampler_mirror_states)
|
|
{
|
|
sampler.remove();
|
|
}
|
|
|
|
for (auto &sampler : m_vs_sampler_states)
|
|
{
|
|
sampler.remove();
|
|
}
|
|
|
|
if (m_attrib_ring_buffer)
|
|
{
|
|
m_attrib_ring_buffer->remove();
|
|
}
|
|
|
|
if (m_transform_constants_buffer)
|
|
{
|
|
m_transform_constants_buffer->remove();
|
|
}
|
|
|
|
if (m_fragment_constants_buffer)
|
|
{
|
|
m_fragment_constants_buffer->remove();
|
|
}
|
|
|
|
if (m_fragment_env_buffer)
|
|
{
|
|
m_fragment_env_buffer->remove();
|
|
}
|
|
|
|
if (m_vertex_env_buffer)
|
|
{
|
|
m_vertex_env_buffer->remove();
|
|
}
|
|
|
|
if (m_texture_parameters_buffer)
|
|
{
|
|
m_texture_parameters_buffer->remove();
|
|
}
|
|
|
|
if (m_vertex_layout_buffer)
|
|
{
|
|
m_vertex_layout_buffer->remove();
|
|
}
|
|
|
|
if (m_index_ring_buffer)
|
|
{
|
|
m_index_ring_buffer->remove();
|
|
}
|
|
|
|
if (m_identity_index_buffer)
|
|
{
|
|
m_identity_index_buffer->remove();
|
|
}
|
|
|
|
m_null_textures.clear();
|
|
m_text_printer.close();
|
|
m_gl_texture_cache.destroy();
|
|
m_depth_converter.destroy();
|
|
m_ui_renderer.destroy();
|
|
m_video_output_pass.destroy();
|
|
|
|
for (u32 i = 0; i < occlusion_query_count; ++i)
|
|
{
|
|
auto &query = m_occlusion_query_data[i];
|
|
query.active = false;
|
|
query.pending = false;
|
|
|
|
GLuint handle = (GLuint)query.driver_handle;
|
|
glDeleteQueries(1, &handle);
|
|
query.driver_handle = 0;
|
|
}
|
|
|
|
glFlush();
|
|
glFinish();
|
|
|
|
GSRender::on_exit();
|
|
}
|
|
|
|
void GLGSRender::clear_surface(u32 arg)
|
|
{
|
|
if (skip_current_frame || !framebuffer_status_valid) return;
|
|
|
|
// If stencil write mask is disabled, remove clear_stencil bit
|
|
if (!rsx::method_registers.stencil_mask()) arg &= ~0x2u;
|
|
|
|
// Ignore invalid clear flags
|
|
if ((arg & 0xf3) == 0) return;
|
|
|
|
GLbitfield mask = 0;
|
|
|
|
gl::command_context cmd{ gl_state };
|
|
const bool require_mem_load =
|
|
rsx::method_registers.scissor_origin_x() > 0 ||
|
|
rsx::method_registers.scissor_origin_y() > 0 ||
|
|
rsx::method_registers.scissor_width() < rsx::method_registers.surface_clip_width() ||
|
|
rsx::method_registers.scissor_height() < rsx::method_registers.surface_clip_height();
|
|
|
|
bool update_color = false, update_z = false;
|
|
rsx::surface_depth_format surface_depth_format = rsx::method_registers.surface_depth_fmt();
|
|
|
|
if (auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); arg & 0x3)
|
|
{
|
|
if (arg & 0x1)
|
|
{
|
|
u32 max_depth_value = get_max_depth_value(surface_depth_format);
|
|
u32 clear_depth = rsx::method_registers.z_clear_value(surface_depth_format == rsx::surface_depth_format::z24s8);
|
|
|
|
gl_state.depth_mask(GL_TRUE);
|
|
gl_state.clear_depth(f32(clear_depth) / max_depth_value);
|
|
mask |= GLenum(gl::buffers::depth);
|
|
}
|
|
|
|
if (surface_depth_format == rsx::surface_depth_format::z24s8)
|
|
{
|
|
if (arg & 0x2)
|
|
{
|
|
u8 clear_stencil = rsx::method_registers.stencil_clear_value();
|
|
|
|
gl_state.stencil_mask(rsx::method_registers.stencil_mask());
|
|
gl_state.clear_stencil(clear_stencil);
|
|
mask |= GLenum(gl::buffers::stencil);
|
|
}
|
|
|
|
if ((arg & 0x3) != 0x3 && !require_mem_load && ds->dirty())
|
|
{
|
|
verify(HERE), mask;
|
|
|
|
// Only one aspect was cleared. Make sure to memory intialize the other before removing dirty flag
|
|
if (arg == 1)
|
|
{
|
|
// Depth was cleared, initialize stencil
|
|
gl_state.stencil_mask(0xFF);
|
|
gl_state.clear_stencil(0xFF);
|
|
mask |= GLenum(gl::buffers::stencil);
|
|
}
|
|
else
|
|
{
|
|
// Stencil was cleared, initialize depth
|
|
gl_state.depth_mask(GL_TRUE);
|
|
gl_state.clear_depth(1.f);
|
|
mask |= GLenum(gl::buffers::depth);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (mask)
|
|
{
|
|
if (require_mem_load) ds->write_barrier(cmd);
|
|
|
|
// Memory has been initialized
|
|
update_z = true;
|
|
}
|
|
}
|
|
|
|
if (auto colormask = (arg & 0xf0))
|
|
{
|
|
switch (rsx::method_registers.surface_color())
|
|
{
|
|
case rsx::surface_color_format::x32:
|
|
case rsx::surface_color_format::w16z16y16x16:
|
|
case rsx::surface_color_format::w32z32y32x32:
|
|
{
|
|
//Nop
|
|
break;
|
|
}
|
|
case rsx::surface_color_format::g8b8:
|
|
{
|
|
colormask = rsx::get_g8b8_r8g8_colormask(colormask);
|
|
// Fall through
|
|
}
|
|
default:
|
|
{
|
|
u8 clear_a = rsx::method_registers.clear_color_a();
|
|
u8 clear_r = rsx::method_registers.clear_color_r();
|
|
u8 clear_g = rsx::method_registers.clear_color_g();
|
|
u8 clear_b = rsx::method_registers.clear_color_b();
|
|
|
|
gl_state.clear_color(clear_r, clear_g, clear_b, clear_a);
|
|
mask |= GLenum(gl::buffers::color);
|
|
|
|
for (u8 index = m_rtts.m_bound_render_targets_config.first, count = 0;
|
|
count < m_rtts.m_bound_render_targets_config.second;
|
|
++count, ++index)
|
|
{
|
|
if (require_mem_load) m_rtts.m_bound_render_targets[index].second->write_barrier(cmd);
|
|
gl_state.color_maski(count, colormask);
|
|
}
|
|
|
|
update_color = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (update_color || update_z)
|
|
{
|
|
const bool write_all_mask[] = { true, true, true, true };
|
|
m_rtts.on_write(update_color ? write_all_mask : nullptr, update_z);
|
|
}
|
|
|
|
glClear(mask);
|
|
}
|
|
|
|
bool GLGSRender::do_method(u32 cmd, u32 arg)
|
|
{
|
|
switch (cmd)
|
|
{
|
|
case NV4097_CLEAR_SURFACE:
|
|
{
|
|
if (arg & 0xF3)
|
|
{
|
|
//Only do all this if we have actual work to do
|
|
u8 ctx = rsx::framebuffer_creation_context::context_draw;
|
|
if (arg & 0xF0) ctx |= rsx::framebuffer_creation_context::context_clear_color;
|
|
if (arg & 0x3) ctx |= rsx::framebuffer_creation_context::context_clear_depth;
|
|
|
|
init_buffers((rsx::framebuffer_creation_context)ctx, true);
|
|
clear_surface(arg);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
case NV4097_CLEAR_ZCULL_SURFACE:
|
|
{
|
|
// NOP
|
|
// Clearing zcull memory does not modify depth/stencil buffers 'bound' to the zcull region
|
|
return true;
|
|
}
|
|
case NV4097_TEXTURE_READ_SEMAPHORE_RELEASE:
|
|
{
|
|
// Texture barrier, seemingly not very useful
|
|
return true;
|
|
}
|
|
case NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE:
|
|
{
|
|
//flush_draw_buffers = true;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool GLGSRender::load_program()
|
|
{
|
|
if (m_graphics_state & rsx::pipeline_state::invalidate_pipeline_bits)
|
|
{
|
|
get_current_fragment_program(fs_sampler_state);
|
|
verify(HERE), current_fragment_program.valid;
|
|
|
|
get_current_vertex_program(vs_sampler_state);
|
|
|
|
current_vertex_program.skip_vertex_input_check = true; //not needed for us since decoding is done server side
|
|
current_fragment_program.unnormalized_coords = 0; //unused
|
|
}
|
|
else if (m_program)
|
|
{
|
|
// Program already loaded
|
|
return true;
|
|
}
|
|
|
|
void* pipeline_properties = nullptr;
|
|
m_program = m_prog_buffer.get_graphics_pipeline(current_vertex_program, current_fragment_program, pipeline_properties,
|
|
!g_cfg.video.disable_asynchronous_shader_compiler).get();
|
|
|
|
if (m_prog_buffer.check_cache_missed())
|
|
{
|
|
if (m_prog_buffer.check_program_linked_flag())
|
|
{
|
|
// Program was linked or queued for linking
|
|
m_shaders_cache->store(pipeline_properties, current_vertex_program, current_fragment_program);
|
|
}
|
|
|
|
// Notify the user with HUD notification
|
|
if (g_cfg.misc.show_shader_compilation_hint)
|
|
{
|
|
if (m_overlay_manager)
|
|
{
|
|
if (auto dlg = m_overlay_manager->get<rsx::overlays::shader_compile_notification>())
|
|
{
|
|
// Extend duration
|
|
dlg->touch();
|
|
}
|
|
else
|
|
{
|
|
// Create dialog but do not show immediately
|
|
m_overlay_manager->create<rsx::overlays::shader_compile_notification>();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return m_program != nullptr;
|
|
}
|
|
|
|
void GLGSRender::load_program_env()
|
|
{
|
|
if (!m_program)
|
|
{
|
|
fmt::throw_exception("Unreachable right now" HERE);
|
|
}
|
|
|
|
const u32 fragment_constants_size = current_fp_metadata.program_constants_buffer_length;
|
|
|
|
const bool update_transform_constants = !!(m_graphics_state & rsx::pipeline_state::transform_constants_dirty);
|
|
const bool update_fragment_constants = !!(m_graphics_state & rsx::pipeline_state::fragment_constants_dirty) && fragment_constants_size;
|
|
const bool update_vertex_env = !!(m_graphics_state & rsx::pipeline_state::vertex_state_dirty);
|
|
const bool update_fragment_env = !!(m_graphics_state & rsx::pipeline_state::fragment_state_dirty);
|
|
const bool update_fragment_texture_env = !!(m_graphics_state & rsx::pipeline_state::fragment_texture_state_dirty);
|
|
|
|
m_program->use();
|
|
|
|
if (manually_flush_ring_buffers)
|
|
{
|
|
if (update_fragment_env) m_fragment_env_buffer->reserve_storage_on_heap(128);
|
|
if (update_vertex_env) m_vertex_env_buffer->reserve_storage_on_heap(256);
|
|
if (update_fragment_texture_env) m_texture_parameters_buffer->reserve_storage_on_heap(256);
|
|
if (update_fragment_constants) m_fragment_constants_buffer->reserve_storage_on_heap(align(fragment_constants_size, 256));
|
|
if (update_transform_constants) m_transform_constants_buffer->reserve_storage_on_heap(8192);
|
|
}
|
|
|
|
if (update_vertex_env)
|
|
{
|
|
// Vertex state
|
|
auto mapping = m_vertex_env_buffer->alloc_from_heap(144, m_uniform_buffer_offset_align);
|
|
auto buf = static_cast<u8*>(mapping.first);
|
|
fill_scale_offset_data(buf, false);
|
|
fill_user_clip_data(buf + 64);
|
|
*(reinterpret_cast<u32*>(buf + 128)) = rsx::method_registers.transform_branch_bits();
|
|
*(reinterpret_cast<f32*>(buf + 132)) = rsx::method_registers.point_size();
|
|
*(reinterpret_cast<f32*>(buf + 136)) = rsx::method_registers.clip_min();
|
|
*(reinterpret_cast<f32*>(buf + 140)) = rsx::method_registers.clip_max();
|
|
|
|
m_vertex_env_buffer->bind_range(0, mapping.second, 144);
|
|
}
|
|
|
|
if (update_transform_constants)
|
|
{
|
|
// Vertex constants
|
|
auto mapping = m_transform_constants_buffer->alloc_from_heap(8192, m_uniform_buffer_offset_align);
|
|
auto buf = static_cast<u8*>(mapping.first);
|
|
fill_vertex_program_constants_data(buf);
|
|
|
|
m_transform_constants_buffer->bind_range(2, mapping.second, 8192);
|
|
}
|
|
|
|
if (update_fragment_constants)
|
|
{
|
|
// Fragment constants
|
|
auto mapping = m_fragment_constants_buffer->alloc_from_heap(fragment_constants_size, m_uniform_buffer_offset_align);
|
|
auto buf = static_cast<u8*>(mapping.first);
|
|
|
|
m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), gsl::narrow<int>(fragment_constants_size) },
|
|
current_fragment_program, gl::get_driver_caps().vendor_NVIDIA);
|
|
|
|
m_fragment_constants_buffer->bind_range(3, mapping.second, fragment_constants_size);
|
|
}
|
|
|
|
if (update_fragment_env)
|
|
{
|
|
// Fragment state
|
|
auto mapping = m_fragment_env_buffer->alloc_from_heap(32, m_uniform_buffer_offset_align);
|
|
auto buf = static_cast<u8*>(mapping.first);
|
|
fill_fragment_state_buffer(buf, current_fragment_program);
|
|
|
|
m_fragment_env_buffer->bind_range(4, mapping.second, 32);
|
|
}
|
|
|
|
if (update_fragment_texture_env)
|
|
{
|
|
// Fragment texture parameters
|
|
auto mapping = m_texture_parameters_buffer->alloc_from_heap(256, m_uniform_buffer_offset_align);
|
|
auto buf = static_cast<u8*>(mapping.first);
|
|
fill_fragment_texture_parameters(buf, current_fragment_program);
|
|
|
|
m_texture_parameters_buffer->bind_range(5, mapping.second, 256);
|
|
}
|
|
|
|
if (manually_flush_ring_buffers)
|
|
{
|
|
if (update_fragment_env) m_fragment_env_buffer->unmap();
|
|
if (update_vertex_env) m_vertex_env_buffer->unmap();
|
|
if (update_fragment_texture_env) m_texture_parameters_buffer->unmap();
|
|
if (update_fragment_constants) m_fragment_constants_buffer->unmap();
|
|
if (update_transform_constants) m_transform_constants_buffer->unmap();
|
|
}
|
|
|
|
const u32 handled_flags = (rsx::pipeline_state::fragment_state_dirty | rsx::pipeline_state::vertex_state_dirty | rsx::pipeline_state::transform_constants_dirty | rsx::pipeline_state::fragment_constants_dirty | rsx::pipeline_state::fragment_texture_state_dirty);
|
|
m_graphics_state &= ~handled_flags;
|
|
}
|
|
|
|
void GLGSRender::update_vertex_env(const gl::vertex_upload_info& upload_info)
|
|
{
|
|
if (manually_flush_ring_buffers)
|
|
{
|
|
m_vertex_layout_buffer->reserve_storage_on_heap(128 + 16);
|
|
}
|
|
|
|
// Vertex layout state
|
|
auto mapping = m_vertex_layout_buffer->alloc_from_heap(128 + 16, m_uniform_buffer_offset_align);
|
|
auto buf = static_cast<u32*>(mapping.first);
|
|
|
|
buf[0] = upload_info.vertex_index_base;
|
|
buf[1] = upload_info.vertex_index_offset;
|
|
buf += 4;
|
|
|
|
fill_vertex_layout_state(m_vertex_layout, upload_info.first_vertex, upload_info.allocated_vertex_count, (s32*)buf, upload_info.persistent_mapping_offset, upload_info.volatile_mapping_offset);
|
|
|
|
m_vertex_layout_buffer->bind_range(1, mapping.second, 128 + 16);
|
|
|
|
if (manually_flush_ring_buffers)
|
|
{
|
|
m_vertex_layout_buffer->unmap();
|
|
}
|
|
}
|
|
|
|
void GLGSRender::update_draw_state()
|
|
{
|
|
m_profiler.start();
|
|
|
|
for (int index = 0; index < m_rtts.get_color_surface_count(); ++index)
|
|
{
|
|
bool color_mask_b = rsx::method_registers.color_mask_b(index);
|
|
bool color_mask_g = rsx::method_registers.color_mask_g(index);
|
|
bool color_mask_r = rsx::method_registers.color_mask_r(index);
|
|
bool color_mask_a = rsx::method_registers.color_mask_a(index);
|
|
|
|
if (rsx::method_registers.surface_color() == rsx::surface_color_format::g8b8)
|
|
{
|
|
//Map GB components onto RG
|
|
rsx::get_g8b8_r8g8_colormask(color_mask_r, color_mask_g, color_mask_b, color_mask_a);
|
|
}
|
|
|
|
gl_state.color_maski(index, color_mask_r, color_mask_g, color_mask_b, color_mask_a);
|
|
}
|
|
|
|
gl_state.depth_mask(rsx::method_registers.depth_write_enabled());
|
|
gl_state.stencil_mask(rsx::method_registers.stencil_mask());
|
|
|
|
gl_state.enable(rsx::method_registers.depth_clamp_enabled() || !rsx::method_registers.depth_clip_enabled(), GL_DEPTH_CLAMP);
|
|
|
|
if (gl_state.enable(rsx::method_registers.depth_test_enabled(), GL_DEPTH_TEST))
|
|
{
|
|
gl_state.depth_func(comparison_op(rsx::method_registers.depth_func()));
|
|
}
|
|
|
|
if (glDepthBoundsEXT && (gl_state.enable(rsx::method_registers.depth_bounds_test_enabled(), GL_DEPTH_BOUNDS_TEST_EXT)))
|
|
{
|
|
gl_state.depth_bounds(rsx::method_registers.depth_bounds_min(), rsx::method_registers.depth_bounds_max());
|
|
}
|
|
|
|
gl_state.enable(rsx::method_registers.dither_enabled(), GL_DITHER);
|
|
|
|
if (gl_state.enable(rsx::method_registers.stencil_test_enabled(), GL_STENCIL_TEST))
|
|
{
|
|
glStencilFunc(comparison_op(rsx::method_registers.stencil_func()),
|
|
rsx::method_registers.stencil_func_ref(),
|
|
rsx::method_registers.stencil_func_mask());
|
|
|
|
glStencilOp(stencil_op(rsx::method_registers.stencil_op_fail()), stencil_op(rsx::method_registers.stencil_op_zfail()),
|
|
stencil_op(rsx::method_registers.stencil_op_zpass()));
|
|
|
|
if (rsx::method_registers.two_sided_stencil_test_enabled())
|
|
{
|
|
glStencilMaskSeparate(GL_BACK, rsx::method_registers.back_stencil_mask());
|
|
|
|
glStencilFuncSeparate(GL_BACK, comparison_op(rsx::method_registers.back_stencil_func()),
|
|
rsx::method_registers.back_stencil_func_ref(), rsx::method_registers.back_stencil_func_mask());
|
|
|
|
glStencilOpSeparate(GL_BACK, stencil_op(rsx::method_registers.back_stencil_op_fail()),
|
|
stencil_op(rsx::method_registers.back_stencil_op_zfail()), stencil_op(rsx::method_registers.back_stencil_op_zpass()));
|
|
}
|
|
}
|
|
|
|
bool mrt_blend_enabled[] =
|
|
{
|
|
rsx::method_registers.blend_enabled(),
|
|
rsx::method_registers.blend_enabled_surface_1(),
|
|
rsx::method_registers.blend_enabled_surface_2(),
|
|
rsx::method_registers.blend_enabled_surface_3()
|
|
};
|
|
|
|
if (mrt_blend_enabled[0] || mrt_blend_enabled[1] || mrt_blend_enabled[2] || mrt_blend_enabled[3])
|
|
{
|
|
glBlendFuncSeparate(blend_factor(rsx::method_registers.blend_func_sfactor_rgb()),
|
|
blend_factor(rsx::method_registers.blend_func_dfactor_rgb()),
|
|
blend_factor(rsx::method_registers.blend_func_sfactor_a()),
|
|
blend_factor(rsx::method_registers.blend_func_dfactor_a()));
|
|
|
|
auto blend_colors = rsx::get_constant_blend_colors();
|
|
glBlendColor(blend_colors[0], blend_colors[1], blend_colors[2], blend_colors[3]);
|
|
|
|
glBlendEquationSeparate(blend_equation(rsx::method_registers.blend_equation_rgb()),
|
|
blend_equation(rsx::method_registers.blend_equation_a()));
|
|
}
|
|
|
|
gl_state.enablei(mrt_blend_enabled[0], GL_BLEND, 0);
|
|
gl_state.enablei(mrt_blend_enabled[1], GL_BLEND, 1);
|
|
gl_state.enablei(mrt_blend_enabled[2], GL_BLEND, 2);
|
|
gl_state.enablei(mrt_blend_enabled[3], GL_BLEND, 3);
|
|
|
|
if (gl_state.enable(rsx::method_registers.logic_op_enabled(), GL_COLOR_LOGIC_OP))
|
|
{
|
|
gl_state.logic_op(logic_op(rsx::method_registers.logic_operation()));
|
|
}
|
|
|
|
gl_state.line_width(rsx::method_registers.line_width());
|
|
gl_state.enable(rsx::method_registers.line_smooth_enabled(), GL_LINE_SMOOTH);
|
|
|
|
gl_state.enable(rsx::method_registers.poly_offset_point_enabled(), GL_POLYGON_OFFSET_POINT);
|
|
gl_state.enable(rsx::method_registers.poly_offset_line_enabled(), GL_POLYGON_OFFSET_LINE);
|
|
gl_state.enable(rsx::method_registers.poly_offset_fill_enabled(), GL_POLYGON_OFFSET_FILL);
|
|
|
|
//offset_bias is the constant factor, multiplied by the implementation factor R
|
|
//offset_scale is the slope factor, multiplied by the triangle slope factor M
|
|
gl_state.polygon_offset(rsx::method_registers.poly_offset_scale(), rsx::method_registers.poly_offset_bias());
|
|
|
|
if (gl_state.enable(rsx::method_registers.cull_face_enabled(), GL_CULL_FACE))
|
|
{
|
|
gl_state.cull_face(cull_face(rsx::method_registers.cull_face_mode()));
|
|
}
|
|
|
|
gl_state.front_face(front_face(rsx::method_registers.front_face_mode()));
|
|
|
|
// Sample control
|
|
// TODO: MinSampleShading
|
|
//gl_state.enable(rsx::method_registers.msaa_enabled(), GL_MULTISAMPLE);
|
|
//gl_state.enable(rsx::method_registers.msaa_alpha_to_coverage_enabled(), GL_SAMPLE_ALPHA_TO_COVERAGE);
|
|
//gl_state.enable(rsx::method_registers.msaa_alpha_to_one_enabled(), GL_SAMPLE_ALPHA_TO_ONE);
|
|
|
|
//TODO
|
|
//NV4097_SET_ANISO_SPREAD
|
|
//NV4097_SET_SPECULAR_ENABLE
|
|
//NV4097_SET_TWO_SIDE_LIGHT_EN
|
|
//NV4097_SET_FLAT_SHADE_OP
|
|
//NV4097_SET_EDGE_FLAG
|
|
|
|
|
|
|
|
//NV4097_SET_COLOR_KEY_COLOR
|
|
//NV4097_SET_SHADER_CONTROL
|
|
//NV4097_SET_ZMIN_MAX_CONTROL
|
|
//NV4097_SET_ANTI_ALIASING_CONTROL
|
|
//NV4097_SET_CLIP_ID_TEST_ENABLE
|
|
|
|
m_frame_stats.setup_time += m_profiler.duration();
|
|
}
|
|
|
|
void GLGSRender::flip(const rsx::display_flip_info_t& info)
|
|
{
|
|
if (info.skip_frame)
|
|
{
|
|
m_frame->flip(m_context, true);
|
|
rsx::thread::flip(info);
|
|
return;
|
|
}
|
|
|
|
u32 buffer_width = display_buffers[info.buffer].width;
|
|
u32 buffer_height = display_buffers[info.buffer].height;
|
|
u32 buffer_pitch = display_buffers[info.buffer].pitch;
|
|
|
|
u32 av_format;
|
|
const auto avconfig = g_fxo->get<rsx::avconf>();
|
|
|
|
if (avconfig->state)
|
|
{
|
|
av_format = avconfig->get_compatible_gcm_format();
|
|
if (!buffer_pitch)
|
|
buffer_pitch = buffer_width * avconfig->get_bpp();
|
|
|
|
buffer_width = std::min(buffer_width, avconfig->resolution_x);
|
|
buffer_height = std::min(buffer_height, avconfig->resolution_y);
|
|
}
|
|
else
|
|
{
|
|
av_format = CELL_GCM_TEXTURE_A8R8G8B8;
|
|
if (!buffer_pitch)
|
|
buffer_pitch = buffer_width * 4;
|
|
}
|
|
|
|
// Disable scissor test (affects blit, clear, etc)
|
|
gl_state.enable(GL_FALSE, GL_SCISSOR_TEST);
|
|
|
|
// Clear the window background to black
|
|
gl_state.clear_color(0, 0, 0, 0);
|
|
gl::screen.bind();
|
|
gl::screen.clear(gl::buffers::color);
|
|
|
|
if ((u32)info.buffer < display_buffers_count && buffer_width && buffer_height)
|
|
{
|
|
// Calculate blit coordinates
|
|
coordi aspect_ratio;
|
|
sizei csize(m_frame->client_width(), m_frame->client_height());
|
|
sizei new_size = csize;
|
|
|
|
if (!g_cfg.video.stretch_to_display_area)
|
|
{
|
|
const double aq = (double)buffer_width / buffer_height;
|
|
const double rq = (double)new_size.width / new_size.height;
|
|
const double q = aq / rq;
|
|
|
|
if (q > 1.0)
|
|
{
|
|
new_size.height = int(new_size.height / q);
|
|
aspect_ratio.y = (csize.height - new_size.height) / 2;
|
|
}
|
|
else if (q < 1.0)
|
|
{
|
|
new_size.width = int(new_size.width * q);
|
|
aspect_ratio.x = (csize.width - new_size.width) / 2;
|
|
}
|
|
}
|
|
|
|
aspect_ratio.size = new_size;
|
|
|
|
// Find the source image
|
|
const u32 absolute_address = rsx::get_address(display_buffers[info.buffer].offset, CELL_GCM_LOCATION_LOCAL);
|
|
GLuint image = GL_NONE;
|
|
|
|
if (auto render_target_texture = m_rtts.get_color_surface_at(absolute_address))
|
|
{
|
|
if (render_target_texture->last_use_tag == m_rtts.write_tag)
|
|
{
|
|
image = render_target_texture->raw_handle();
|
|
}
|
|
else
|
|
{
|
|
gl::command_context cmd = { gl_state };
|
|
const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd, absolute_address, buffer_width, buffer_height, buffer_pitch, render_target_texture->get_bpp(), rsx::surface_access::read);
|
|
|
|
if (!overlap_info.empty() && overlap_info.back().surface == render_target_texture)
|
|
{
|
|
// Confirmed to be the newest data source in that range
|
|
image = render_target_texture->raw_handle();
|
|
}
|
|
}
|
|
|
|
if (image)
|
|
{
|
|
buffer_width = rsx::apply_resolution_scale(buffer_width, true);
|
|
buffer_height = rsx::apply_resolution_scale(buffer_height, true);
|
|
|
|
if (buffer_width > render_target_texture->width() ||
|
|
buffer_height > render_target_texture->height())
|
|
{
|
|
// TODO: Should emit only once to avoid flooding the log file
|
|
// TODO: Take AA scaling into account
|
|
LOG_WARNING(RSX, "Selected output image does not satisfy the video configuration. Display buffer resolution=%dx%d, avconf resolution=%dx%d, surface=%dx%d",
|
|
display_buffers[info.buffer].width, display_buffers[info.buffer].height, avconfig->state * avconfig->resolution_x, avconfig->state * avconfig->resolution_y,
|
|
render_target_texture->get_surface_width(rsx::surface_metrics::pixels), render_target_texture->get_surface_height(rsx::surface_metrics::pixels));
|
|
|
|
buffer_width = render_target_texture->width();
|
|
buffer_height = render_target_texture->height();
|
|
}
|
|
}
|
|
}
|
|
else if (auto surface = m_gl_texture_cache.find_texture_from_dimensions<true>(absolute_address, av_format, buffer_width, buffer_height))
|
|
{
|
|
//Hack - this should be the first location to check for output
|
|
//The render might have been done offscreen or in software and a blit used to display
|
|
if (const auto tex = surface->get_raw_texture(); tex) image = tex->id();
|
|
}
|
|
|
|
if (!image)
|
|
{
|
|
LOG_WARNING(RSX, "Flip texture was not found in cache. Uploading surface from CPU");
|
|
|
|
gl::pixel_unpack_settings unpack_settings;
|
|
unpack_settings.alignment(1).row_length(buffer_pitch / 4);
|
|
|
|
if (!m_flip_tex_color || m_flip_tex_color->size2D() != sizei{ (int)buffer_width, (int)buffer_height })
|
|
{
|
|
m_flip_tex_color = std::make_unique<gl::texture>(GL_TEXTURE_2D, buffer_width, buffer_height, 1, 1, GL_RGBA8);
|
|
}
|
|
|
|
gl::command_context cmd{ gl_state };
|
|
const auto range = utils::address_range::start_length(absolute_address, buffer_pitch * buffer_height);
|
|
m_gl_texture_cache.invalidate_range(cmd, range, rsx::invalidation_cause::read);
|
|
|
|
m_flip_tex_color->copy_from(vm::base(absolute_address), gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8, unpack_settings);
|
|
image = m_flip_tex_color->id();
|
|
}
|
|
|
|
if (m_frame->screenshot_toggle == true)
|
|
{
|
|
m_frame->screenshot_toggle = false;
|
|
|
|
std::vector<u8> sshot_frame(buffer_height * buffer_width * 4);
|
|
|
|
gl::pixel_pack_settings pack_settings{};
|
|
pack_settings.apply();
|
|
|
|
if (gl::get_driver_caps().ARB_dsa_supported)
|
|
glGetTextureImage(image, 0, GL_BGRA, GL_UNSIGNED_BYTE, buffer_height * buffer_width * 4, sshot_frame.data());
|
|
else
|
|
glGetTextureImageEXT(image, GL_TEXTURE_2D, 0, GL_BGRA, GL_UNSIGNED_BYTE, sshot_frame.data());
|
|
|
|
if (GLenum err; (err = glGetError()) != GL_NO_ERROR)
|
|
LOG_ERROR(GENERAL, "[Screenshot] Failed to capture image: 0x%x", err);
|
|
else
|
|
m_frame->take_screenshot(std::move(sshot_frame), buffer_width, buffer_height);
|
|
}
|
|
|
|
areai screen_area = coordi({}, { (int)buffer_width, (int)buffer_height });
|
|
|
|
if (g_cfg.video.full_rgb_range_output && avconfig->gamma == 1.f)
|
|
{
|
|
// Blit source image to the screen
|
|
m_flip_fbo.recreate();
|
|
m_flip_fbo.bind();
|
|
m_flip_fbo.color = image;
|
|
m_flip_fbo.read_buffer(m_flip_fbo.color);
|
|
m_flip_fbo.draw_buffer(m_flip_fbo.color);
|
|
m_flip_fbo.blit(gl::screen, screen_area, areai(aspect_ratio).flipped_vertical(), gl::buffers::color, gl::filter::linear);
|
|
}
|
|
else
|
|
{
|
|
const f32 gamma = avconfig->gamma;
|
|
const bool limited_range = !g_cfg.video.full_rgb_range_output;
|
|
|
|
gl::screen.bind();
|
|
glViewport(0, 0, m_frame->client_width(), m_frame->client_height());
|
|
m_video_output_pass.run(m_frame->client_width(), m_frame->client_height(), image, areai(aspect_ratio), gamma, limited_range);
|
|
}
|
|
}
|
|
|
|
if (m_overlay_manager)
|
|
{
|
|
if (m_overlay_manager->has_dirty())
|
|
{
|
|
m_overlay_manager->lock();
|
|
|
|
std::vector<u32> uids_to_dispose;
|
|
uids_to_dispose.reserve(m_overlay_manager->get_dirty().size());
|
|
|
|
for (const auto& view : m_overlay_manager->get_dirty())
|
|
{
|
|
m_ui_renderer.remove_temp_resources(view->uid);
|
|
uids_to_dispose.push_back(view->uid);
|
|
}
|
|
|
|
m_overlay_manager->unlock();
|
|
m_overlay_manager->dispose(uids_to_dispose);
|
|
}
|
|
|
|
if (m_overlay_manager->has_visible())
|
|
{
|
|
gl::screen.bind();
|
|
glViewport(0, 0, m_frame->client_width(), m_frame->client_height());
|
|
|
|
// Lock to avoid modification during run-update chain
|
|
std::lock_guard lock(*m_overlay_manager);
|
|
|
|
for (const auto& view : m_overlay_manager->get_views())
|
|
{
|
|
m_ui_renderer.run(m_frame->client_width(), m_frame->client_height(), 0, *view.get());
|
|
}
|
|
}
|
|
}
|
|
|
|
if (g_cfg.video.overlay)
|
|
{
|
|
gl::screen.bind();
|
|
glViewport(0, 0, m_frame->client_width(), m_frame->client_height());
|
|
|
|
m_text_printer.print_text(0, 0, m_frame->client_width(), m_frame->client_height(), fmt::format("RSX Load: %3d%%", get_load()));
|
|
m_text_printer.print_text(0, 18, m_frame->client_width(), m_frame->client_height(), fmt::format("draw calls: %16d", info.stats.draw_calls));
|
|
m_text_printer.print_text(0, 36, m_frame->client_width(), m_frame->client_height(), fmt::format("draw call setup: %11dus", info.stats.setup_time));
|
|
m_text_printer.print_text(0, 54, m_frame->client_width(), m_frame->client_height(), fmt::format("vertex upload time: %8dus", info.stats.vertex_upload_time));
|
|
m_text_printer.print_text(0, 72, m_frame->client_width(), m_frame->client_height(), fmt::format("textures upload time: %6dus", info.stats.textures_upload_time));
|
|
m_text_printer.print_text(0, 90, m_frame->client_width(), m_frame->client_height(), fmt::format("draw call execution: %7dus", info.stats.draw_exec_time));
|
|
|
|
const auto num_dirty_textures = m_gl_texture_cache.get_unreleased_textures_count();
|
|
const auto texture_memory_size = m_gl_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
|
|
const auto num_flushes = m_gl_texture_cache.get_num_flush_requests();
|
|
const auto num_mispredict = m_gl_texture_cache.get_num_cache_mispredictions();
|
|
const auto num_speculate = m_gl_texture_cache.get_num_cache_speculative_writes();
|
|
const auto num_misses = m_gl_texture_cache.get_num_cache_misses();
|
|
const auto num_unavoidable = m_gl_texture_cache.get_num_unavoidable_hard_faults();
|
|
const auto cache_miss_ratio = (u32)ceil(m_gl_texture_cache.get_cache_miss_ratio() * 100);
|
|
m_text_printer.print_text(0, 126, m_frame->client_width(), m_frame->client_height(), fmt::format("Unreleased textures: %7d", num_dirty_textures));
|
|
m_text_printer.print_text(0, 144, m_frame->client_width(), m_frame->client_height(), fmt::format("Texture memory: %12dM", texture_memory_size));
|
|
m_text_printer.print_text(0, 162, m_frame->client_width(), m_frame->client_height(), fmt::format("Flush requests: %12d = %2d (%3d%%) hard faults, %2d unavoidable, %2d misprediction(s), %2d speculation(s)", num_flushes, num_misses, cache_miss_ratio, num_unavoidable, num_mispredict, num_speculate));
|
|
}
|
|
|
|
m_frame->flip(m_context);
|
|
rsx::thread::flip(info);
|
|
|
|
// Cleanup
|
|
m_gl_texture_cache.on_frame_end();
|
|
m_vertex_cache->purge();
|
|
|
|
auto removed_textures = m_rtts.free_invalidated();
|
|
m_framebuffer_cache.remove_if([&](auto& fbo)
|
|
{
|
|
if (fbo.unused_check_count() >= 2) return true; // Remove if stale
|
|
if (fbo.references_any(removed_textures)) return true; // Remove if any of the attachments is invalid
|
|
|
|
return false;
|
|
});
|
|
|
|
if (m_draw_fbo && !m_rtts_dirty)
|
|
{
|
|
// Always restore the active framebuffer
|
|
m_draw_fbo->bind();
|
|
set_viewport();
|
|
set_scissor(!!(m_graphics_state & rsx::pipeline_state::scissor_setup_clipped));
|
|
}
|
|
}
|
|
|
|
bool GLGSRender::on_access_violation(u32 address, bool is_writing)
|
|
{
|
|
const bool can_flush = (std::this_thread::get_id() == m_rsx_thread);
|
|
const rsx::invalidation_cause cause =
|
|
is_writing ? (can_flush ? rsx::invalidation_cause::write : rsx::invalidation_cause::deferred_write)
|
|
: (can_flush ? rsx::invalidation_cause::read : rsx::invalidation_cause::deferred_read);
|
|
|
|
auto cmd = can_flush ? gl::command_context{ gl_state } : gl::command_context{};
|
|
auto result = m_gl_texture_cache.invalidate_address(cmd, address, cause);
|
|
|
|
if (!result.violation_handled)
|
|
return false;
|
|
|
|
{
|
|
std::lock_guard lock(m_sampler_mutex);
|
|
m_samplers_dirty.store(true);
|
|
}
|
|
|
|
if (result.num_flushable > 0)
|
|
{
|
|
work_item &task = post_flush_request(address, result);
|
|
|
|
vm::temporary_unlock();
|
|
task.producer_wait();
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void GLGSRender::on_invalidate_memory_range(const utils::address_range &range, rsx::invalidation_cause cause)
|
|
{
|
|
gl::command_context cmd{ gl_state };
|
|
auto data = std::move(m_gl_texture_cache.invalidate_range(cmd, range, cause));
|
|
AUDIT(data.empty());
|
|
|
|
if (cause == rsx::invalidation_cause::unmap && data.violation_handled)
|
|
{
|
|
m_gl_texture_cache.purge_unreleased_sections();
|
|
{
|
|
std::lock_guard lock(m_sampler_mutex);
|
|
m_samplers_dirty.store(true);
|
|
}
|
|
}
|
|
}
|
|
|
|
void GLGSRender::on_semaphore_acquire_wait()
|
|
{
|
|
if (!work_queue.empty() ||
|
|
(async_flip_requested & flip_request::emu_requested))
|
|
{
|
|
do_local_task(rsx::FIFO_state::lock_wait);
|
|
}
|
|
}
|
|
|
|
void GLGSRender::do_local_task(rsx::FIFO_state state)
|
|
{
|
|
if (!work_queue.empty())
|
|
{
|
|
std::lock_guard lock(queue_guard);
|
|
|
|
work_queue.remove_if([](work_item &q) { return q.received; });
|
|
|
|
for (work_item& q : work_queue)
|
|
{
|
|
if (q.processed) continue;
|
|
|
|
gl::command_context cmd{ gl_state };
|
|
q.result = m_gl_texture_cache.flush_all(cmd, q.section_data);
|
|
q.processed = true;
|
|
}
|
|
}
|
|
else if (!in_begin_end && state != rsx::FIFO_state::lock_wait)
|
|
{
|
|
if (m_graphics_state & rsx::pipeline_state::framebuffer_reads_dirty)
|
|
{
|
|
//This will re-engage locks and break the texture cache if another thread is waiting in access violation handler!
|
|
//Only call when there are no waiters
|
|
m_gl_texture_cache.do_update();
|
|
m_graphics_state &= ~rsx::pipeline_state::framebuffer_reads_dirty;
|
|
}
|
|
}
|
|
|
|
rsx::thread::do_local_task(state);
|
|
|
|
if (state == rsx::FIFO_state::lock_wait)
|
|
{
|
|
// Critical check finished
|
|
return;
|
|
}
|
|
|
|
if (m_overlay_manager)
|
|
{
|
|
if (!in_begin_end && async_flip_requested & flip_request::native_ui)
|
|
{
|
|
rsx::display_flip_info_t info{};
|
|
info.buffer = current_display_buffer;
|
|
flip(info);
|
|
}
|
|
}
|
|
}
|
|
|
|
work_item& GLGSRender::post_flush_request(u32 address, gl::texture_cache::thrashed_set& flush_data)
|
|
{
|
|
std::lock_guard lock(queue_guard);
|
|
|
|
work_item &result = work_queue.emplace_back();
|
|
result.address_to_flush = address;
|
|
result.section_data = std::move(flush_data);
|
|
return result;
|
|
}
|
|
|
|
bool GLGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate)
|
|
{
|
|
gl::command_context cmd{ gl_state };
|
|
if (m_gl_texture_cache.blit(cmd, src, dst, interpolate, m_rtts))
|
|
{
|
|
m_samplers_dirty.store(true);
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
void GLGSRender::notify_tile_unbound(u32 tile)
|
|
{
|
|
//TODO: Handle texture writeback
|
|
//u32 addr = rsx::get_address(tiles[tile].offset, tiles[tile].location);
|
|
//on_notify_memory_unmapped(addr, tiles[tile].size);
|
|
//m_rtts.invalidate_surface_address(addr, false);
|
|
|
|
{
|
|
std::lock_guard lock(m_sampler_mutex);
|
|
m_samplers_dirty.store(true);
|
|
}
|
|
}
|
|
|
|
void GLGSRender::begin_occlusion_query(rsx::reports::occlusion_query_info* query)
|
|
{
|
|
query->result = 0;
|
|
glBeginQuery(GL_ANY_SAMPLES_PASSED, (GLuint)query->driver_handle);
|
|
}
|
|
|
|
void GLGSRender::end_occlusion_query(rsx::reports::occlusion_query_info* query)
|
|
{
|
|
verify(HERE), query->active;
|
|
glEndQuery(GL_ANY_SAMPLES_PASSED);
|
|
}
|
|
|
|
bool GLGSRender::check_occlusion_query_status(rsx::reports::occlusion_query_info* query)
|
|
{
|
|
if (!query->num_draws)
|
|
return true;
|
|
|
|
GLint status = GL_TRUE;
|
|
glGetQueryObjectiv((GLuint)query->driver_handle, GL_QUERY_RESULT_AVAILABLE, &status);
|
|
|
|
return status != GL_FALSE;
|
|
}
|
|
|
|
void GLGSRender::get_occlusion_query_result(rsx::reports::occlusion_query_info* query)
|
|
{
|
|
if (query->num_draws)
|
|
{
|
|
GLint result = 0;
|
|
glGetQueryObjectiv((GLuint)query->driver_handle, GL_QUERY_RESULT, &result);
|
|
|
|
query->result += result;
|
|
}
|
|
}
|
|
|
|
void GLGSRender::discard_occlusion_query(rsx::reports::occlusion_query_info* query)
|
|
{
|
|
if (query->active)
|
|
{
|
|
//Discard is being called on an active query, close it
|
|
glEndQuery(GL_ANY_SAMPLES_PASSED);
|
|
}
|
|
}
|
|
|
|
void GLGSRender::on_decompiler_init()
|
|
{
|
|
// Bind decompiler context to this thread
|
|
m_frame->set_current(m_decompiler_context);
|
|
}
|
|
|
|
void GLGSRender::on_decompiler_exit()
|
|
{
|
|
// Cleanup
|
|
m_frame->delete_context(m_decompiler_context);
|
|
}
|
|
|
|
bool GLGSRender::on_decompiler_task()
|
|
{
|
|
const auto result = m_prog_buffer.async_update(8);
|
|
if (result.second)
|
|
{
|
|
// TODO: Proper synchronization with renderer
|
|
// Finish works well enough for now but it is not a proper soulution
|
|
glFinish();
|
|
}
|
|
|
|
return result.first;
|
|
}
|