mirror of
https://github.com/RPCSX/rpcsx.git
synced 2026-02-20 22:56:55 +01:00
- Both ZCULL stats and ZPASS stats require hardware queries, but ZCULL stats should not contribute to ZPASS stats and vice versa! - Disables hardware queries for ZCULL stats by themselves, we cannot generate them correctly anyway and no game so far has been found to actually use them. Should lessen the load on the backend for games that do not actually require it.
930 lines
26 KiB
C++
930 lines
26 KiB
C++
#pragma once
|
|
|
|
#include <deque>
|
|
#include <variant>
|
|
#include <stack>
|
|
|
|
#include "GCM.h"
|
|
#include "rsx_cache.h"
|
|
#include "RSXFIFO.h"
|
|
#include "RSXTexture.h"
|
|
#include "RSXOffload.h"
|
|
#include "RSXVertexProgram.h"
|
|
#include "RSXFragmentProgram.h"
|
|
#include "rsx_methods.h"
|
|
#include "rsx_utils.h"
|
|
#include "Overlays/overlays.h"
|
|
#include "Common/texture_cache_utils.h"
|
|
|
|
#include "Utilities/Thread.h"
|
|
#include "Utilities/geometry.h"
|
|
#include "Capture/rsx_trace.h"
|
|
#include "Capture/rsx_replay.h"
|
|
|
|
#include "Emu/Cell/lv2/sys_rsx.h"
|
|
#include "Emu/IdManager.h"
|
|
|
|
extern u64 get_guest_system_time();
|
|
extern u64 get_system_time();
|
|
|
|
extern bool user_asked_for_frame_capture;
|
|
extern bool capture_current_frame;
|
|
extern rsx::frame_trace_data frame_debug;
|
|
extern rsx::frame_capture_data frame_capture;
|
|
|
|
namespace rsx
|
|
{
|
|
struct rsx_iomap_table
|
|
{
|
|
std::array<atomic_t<u32>, 4096> ea;
|
|
std::array<atomic_t<u32>, 4096> io;
|
|
|
|
rsx_iomap_table() noexcept
|
|
{
|
|
std::fill(ea.begin(), ea.end(), -1);
|
|
std::fill(io.begin(), io.end(), -1);
|
|
}
|
|
|
|
// Try to get the real address given a mapped address
|
|
// Returns -1 on failure
|
|
u32 get_addr(u32 offs) const noexcept
|
|
{
|
|
return this->ea[offs >> 20] | (offs & 0xFFFFF);
|
|
}
|
|
};
|
|
|
|
enum framebuffer_creation_context : u8
|
|
{
|
|
context_draw = 0,
|
|
context_clear_color = 1,
|
|
context_clear_depth = 2,
|
|
context_clear_all = context_clear_color | context_clear_depth
|
|
};
|
|
|
|
enum pipeline_state : u32
|
|
{
|
|
fragment_program_dirty = 0x1, // Fragment program changed
|
|
vertex_program_dirty = 0x2, // Vertex program changed
|
|
fragment_state_dirty = 0x4, // Fragment state changed (alpha test, etc)
|
|
vertex_state_dirty = 0x8, // Vertex state changed (scale_offset, clip planes, etc)
|
|
transform_constants_dirty = 0x10, // Transform constants changed
|
|
fragment_constants_dirty = 0x20, // Fragment constants changed
|
|
framebuffer_reads_dirty = 0x40, // Framebuffer contents changed
|
|
fragment_texture_state_dirty = 0x80, // Fragment texture parameters changed
|
|
vertex_texture_state_dirty = 0x100, // Fragment texture parameters changed
|
|
scissor_config_state_dirty = 0x200, // Scissor region changed
|
|
|
|
scissor_setup_invalid = 0x400, // Scissor configuration is broken
|
|
scissor_setup_clipped = 0x800, // Scissor region is cropped by viewport constraint
|
|
|
|
invalidate_pipeline_bits = fragment_program_dirty | vertex_program_dirty,
|
|
memory_barrier_bits = framebuffer_reads_dirty,
|
|
all_dirty = ~0u
|
|
};
|
|
|
|
enum FIFO_state : u8
|
|
{
|
|
running = 0,
|
|
empty = 1, // PUT == GET
|
|
spinning = 2, // Puller continuously jumps to self addr (synchronization technique)
|
|
nop = 3, // Puller is processing a NOP command
|
|
lock_wait = 4 // Puller is processing a lock acquire
|
|
};
|
|
|
|
enum FIFO_hint : u8
|
|
{
|
|
hint_conditional_render_eval = 1,
|
|
hint_zcull_sync = 2
|
|
};
|
|
|
|
enum result_flags: u8
|
|
{
|
|
result_none = 0,
|
|
result_error = 1,
|
|
result_zcull_intr = 2
|
|
};
|
|
|
|
u32 get_vertex_type_size_on_host(vertex_base_type type, u32 size);
|
|
|
|
// TODO: Replace with std::source_location in c++20
|
|
u32 get_address(u32 offset, u32 location, const char* from);
|
|
|
|
struct tiled_region
|
|
{
|
|
u32 address;
|
|
u32 base;
|
|
GcmTileInfo *tile;
|
|
u8 *ptr;
|
|
|
|
void write(const void *src, u32 width, u32 height, u32 pitch);
|
|
void read(void *dst, u32 width, u32 height, u32 pitch);
|
|
};
|
|
|
|
struct vertex_array_buffer
|
|
{
|
|
rsx::vertex_base_type type;
|
|
u8 attribute_size;
|
|
u8 stride;
|
|
gsl::span<const std::byte> data;
|
|
u8 index;
|
|
bool is_be;
|
|
};
|
|
|
|
struct vertex_array_register
|
|
{
|
|
rsx::vertex_base_type type;
|
|
u8 attribute_size;
|
|
std::array<u32, 4> data;
|
|
u8 index;
|
|
};
|
|
|
|
struct empty_vertex_array
|
|
{
|
|
u8 index;
|
|
};
|
|
|
|
struct draw_array_command
|
|
{
|
|
u32 __dummy;
|
|
};
|
|
|
|
struct draw_indexed_array_command
|
|
{
|
|
gsl::span<const std::byte> raw_index_buffer;
|
|
};
|
|
|
|
struct draw_inlined_array
|
|
{
|
|
u32 __dummy;
|
|
u32 __dummy2;
|
|
};
|
|
|
|
struct interleaved_attribute_t
|
|
{
|
|
u8 index;
|
|
bool modulo;
|
|
u16 frequency;
|
|
};
|
|
|
|
struct interleaved_range_info
|
|
{
|
|
bool interleaved = false;
|
|
bool single_vertex = false;
|
|
u32 base_offset = 0;
|
|
u32 real_offset_address = 0;
|
|
u8 memory_location = 0;
|
|
u8 attribute_stride = 0;
|
|
|
|
rsx::simple_array<interleaved_attribute_t> locations;
|
|
|
|
// Check if we need to upload a full unoptimized range, i.e [0-max_index]
|
|
std::pair<u32, u32> calculate_required_range(u32 first, u32 count) const
|
|
{
|
|
if (single_vertex)
|
|
{
|
|
return { 0, 1 };
|
|
}
|
|
|
|
const u32 max_index = (first + count) - 1;
|
|
u32 _max_index = 0;
|
|
u32 _min_index = first;
|
|
|
|
for (const auto &attrib : locations)
|
|
{
|
|
if (attrib.frequency <= 1) [[likely]]
|
|
{
|
|
_max_index = max_index;
|
|
}
|
|
else
|
|
{
|
|
if (attrib.modulo)
|
|
{
|
|
if (max_index >= attrib.frequency)
|
|
{
|
|
// Actually uses the modulo operator, cannot safely optimize
|
|
_min_index = 0;
|
|
_max_index = std::max<u32>(_max_index, attrib.frequency - 1);
|
|
}
|
|
else
|
|
{
|
|
// Same as having no modulo
|
|
_max_index = max_index;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Division operator
|
|
_min_index = std::min(_min_index, first / attrib.frequency);
|
|
_max_index = std::max<u32>(_max_index, max_index / attrib.frequency);
|
|
}
|
|
}
|
|
}
|
|
|
|
verify(HERE), _max_index >= _min_index;
|
|
return { _min_index, (_max_index - _min_index) + 1 };
|
|
}
|
|
};
|
|
|
|
enum attribute_buffer_placement : u8
|
|
{
|
|
none = 0,
|
|
persistent = 1,
|
|
transient = 2
|
|
};
|
|
|
|
struct vertex_input_layout
|
|
{
|
|
std::vector<interleaved_range_info> interleaved_blocks; // Interleaved blocks to be uploaded as-is
|
|
std::vector<std::pair<u8, u32>> volatile_blocks; // Volatile data blocks (immediate draw vertex data for example)
|
|
rsx::simple_array<u8> referenced_registers; // Volatile register data
|
|
|
|
std::array<attribute_buffer_placement, 16> attribute_placement;
|
|
|
|
vertex_input_layout()
|
|
{
|
|
attribute_placement.fill(attribute_buffer_placement::none);
|
|
}
|
|
|
|
void clear()
|
|
{
|
|
interleaved_blocks.clear();
|
|
volatile_blocks.clear();
|
|
referenced_registers.clear();
|
|
}
|
|
|
|
bool validate() const
|
|
{
|
|
// Criteria: At least one array stream has to be defined to feed vertex positions
|
|
// This stream cannot be a const register as the vertices cannot create a zero-area primitive
|
|
|
|
if (!interleaved_blocks.empty() && interleaved_blocks.front().attribute_stride != 0)
|
|
return true;
|
|
|
|
if (!volatile_blocks.empty())
|
|
return true;
|
|
|
|
for (u8 index = 0; index < limits::vertex_count; ++index)
|
|
{
|
|
switch (attribute_placement[index])
|
|
{
|
|
case attribute_buffer_placement::transient:
|
|
{
|
|
// Ignore register reference
|
|
if (std::find(referenced_registers.begin(), referenced_registers.end(), index) != referenced_registers.end())
|
|
continue;
|
|
|
|
// The source is inline array or immediate draw push buffer
|
|
return true;
|
|
}
|
|
case attribute_buffer_placement::persistent:
|
|
{
|
|
return true;
|
|
}
|
|
case attribute_buffer_placement::none:
|
|
{
|
|
continue;
|
|
}
|
|
default:
|
|
{
|
|
fmt::throw_exception("Unreachable" HERE);
|
|
}
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
u32 calculate_interleaved_memory_requirements(u32 first_vertex, u32 vertex_count) const
|
|
{
|
|
u32 mem = 0;
|
|
for (auto &block : interleaved_blocks)
|
|
{
|
|
const auto range = block.calculate_required_range(first_vertex, vertex_count);
|
|
mem += range.second * block.attribute_stride;
|
|
}
|
|
|
|
return mem;
|
|
}
|
|
};
|
|
|
|
struct framebuffer_layout
|
|
{
|
|
u16 width;
|
|
u16 height;
|
|
std::array<u32, 4> color_addresses;
|
|
std::array<u32, 4> color_pitch;
|
|
std::array<u32, 4> actual_color_pitch;
|
|
std::array<bool, 4> color_write_enabled;
|
|
u32 zeta_address;
|
|
u32 zeta_pitch;
|
|
u32 actual_zeta_pitch;
|
|
bool zeta_write_enabled;
|
|
rsx::surface_target target;
|
|
rsx::surface_color_format color_format;
|
|
rsx::surface_depth_format depth_format;
|
|
rsx::surface_antialiasing aa_mode;
|
|
u32 aa_factors[2];
|
|
bool depth_float;
|
|
bool ignore_change;
|
|
};
|
|
|
|
namespace reports
|
|
{
|
|
struct occlusion_query_info
|
|
{
|
|
u32 driver_handle;
|
|
u32 result;
|
|
u32 num_draws;
|
|
u32 data_type;
|
|
u64 sync_tag;
|
|
u64 timestamp;
|
|
bool pending;
|
|
bool active;
|
|
bool owned;
|
|
};
|
|
|
|
struct queued_report_write
|
|
{
|
|
u32 type = CELL_GCM_ZPASS_PIXEL_CNT;
|
|
u32 counter_tag;
|
|
occlusion_query_info* query;
|
|
queued_report_write* forwarder;
|
|
|
|
vm::addr_t sink; // Memory location of the report
|
|
std::vector<vm::addr_t> sink_alias; // Aliased memory addresses
|
|
};
|
|
|
|
struct query_search_result
|
|
{
|
|
bool found;
|
|
u32 raw_zpass_result;
|
|
std::vector<occlusion_query_info*> queries;
|
|
};
|
|
|
|
enum sync_control
|
|
{
|
|
sync_none = 0,
|
|
sync_defer_copy = 1, // If set, return a zcull intr code instead of forcefully reading zcull data
|
|
sync_no_notify = 2 // If set, backend hint notifications will not be made
|
|
};
|
|
|
|
class ZCULL_control
|
|
{
|
|
protected:
|
|
// Delay before a report update operation is forced to retire
|
|
const u32 max_zcull_delay_us = 300;
|
|
const u32 min_zcull_tick_us = 100;
|
|
|
|
// Number of occlusion query slots available. Real hardware actually has far fewer units before choking
|
|
const u32 occlusion_query_count = 1024;
|
|
const u32 max_safe_queue_depth = 892;
|
|
|
|
bool unit_enabled = false; // The ZCULL unit is on
|
|
bool write_enabled = false; // A surface in the ZCULL-monitored tile region has been loaded for rasterization
|
|
bool stats_enabled = false; // Collecting of ZCULL statistics is enabled (not same as pixels passing Z test!)
|
|
bool zpass_count_enabled = false; // Collecting of ZPASS statistics is enabled. If this is off, the counter does not increment
|
|
bool host_queries_active = false; // The backend/host is gathering Z data for the ZCULL unit
|
|
|
|
std::array<occlusion_query_info, 1024> m_occlusion_query_data = {};
|
|
std::stack<occlusion_query_info*> m_free_occlusion_pool;
|
|
|
|
occlusion_query_info* m_current_task = nullptr;
|
|
u32 m_statistics_tag_id = 0;
|
|
|
|
// Scheduling clock. Granunlarity is min_zcull_tick value.
|
|
u64 m_tsc = 0;
|
|
u64 m_next_tsc = 0;
|
|
|
|
// Incremental tag used for tracking sync events. Hardware clock resolution is too low for the job.
|
|
u64 m_sync_tag = 0;
|
|
u64 m_timer = 0;
|
|
|
|
std::vector<queued_report_write> m_pending_writes;
|
|
std::unordered_map<u32, u32> m_statistics_map;
|
|
|
|
// Enables/disables the ZCULL unit
|
|
void set_active(class ::rsx::thread* ptimer, bool active, bool flush_queue);
|
|
|
|
// Checks current state of the unit and applies changes
|
|
void check_state(class ::rsx::thread* ptimer, bool flush_queue);
|
|
|
|
// Sets up a new query slot and sets it to the current task
|
|
void allocate_new_query(class ::rsx::thread* ptimer);
|
|
|
|
// Free a query slot in use
|
|
void free_query(occlusion_query_info* query);
|
|
|
|
// Write report to memory
|
|
void write(vm::addr_t sink, u64 timestamp, u32 type, u32 value);
|
|
void write(queued_report_write* writer, u64 timestamp, u32 value);
|
|
|
|
public:
|
|
|
|
ZCULL_control();
|
|
~ZCULL_control();
|
|
|
|
void set_enabled(class ::rsx::thread* ptimer, bool state, bool flush_queue = false);
|
|
void set_status(class ::rsx::thread* ptimer, bool surface_active, bool zpass_active, bool zcull_stats_active, bool flush_queue = false);
|
|
|
|
// Read current zcull statistics into the address provided
|
|
void read_report(class ::rsx::thread* ptimer, vm::addr_t sink, u32 type);
|
|
|
|
// Clears current stat block and increments stat_tag_id
|
|
void clear(class ::rsx::thread* ptimer);
|
|
|
|
// Forcefully flushes all
|
|
void sync(class ::rsx::thread* ptimer);
|
|
|
|
// Conditionally sync any pending writes if range overlaps
|
|
flags32_t read_barrier(class ::rsx::thread* ptimer, u32 memory_address, u32 memory_range, flags32_t flags);
|
|
|
|
// Call once every 'tick' to update, optional address provided to partially sync until address is processed
|
|
void update(class ::rsx::thread* ptimer, u32 sync_address = 0, bool hint = false);
|
|
|
|
// Draw call notification
|
|
void on_draw();
|
|
|
|
// Sync hint notification
|
|
void on_sync_hint(void* args);
|
|
|
|
// Check for pending writes
|
|
bool has_pending() const { return !m_pending_writes.empty(); }
|
|
|
|
// Search for query synchronized at address
|
|
query_search_result find_query(vm::addr_t sink_address, bool all);
|
|
|
|
// Copies queries in range rebased from source range to destination range
|
|
u32 copy_reports_to(u32 start, u32 range, u32 dest);
|
|
|
|
// Backend methods (optional, will return everything as always visible by default)
|
|
virtual void begin_occlusion_query(occlusion_query_info* /*query*/) {}
|
|
virtual void end_occlusion_query(occlusion_query_info* /*query*/) {}
|
|
virtual bool check_occlusion_query_status(occlusion_query_info* /*query*/) { return true; }
|
|
virtual void get_occlusion_query_result(occlusion_query_info* query) { query->result = UINT32_MAX; }
|
|
virtual void discard_occlusion_query(occlusion_query_info* /*query*/) {}
|
|
};
|
|
|
|
// Helper class for conditional rendering
|
|
struct conditional_render_eval
|
|
{
|
|
bool enabled = false;
|
|
bool eval_failed = false;
|
|
bool hw_cond_active = false;
|
|
bool reserved = false;
|
|
|
|
std::vector<occlusion_query_info*> eval_sources;
|
|
u64 eval_sync_tag = 0;
|
|
u32 eval_address = 0;
|
|
|
|
// Resets common data
|
|
void reset();
|
|
|
|
// Returns true if rendering is disabled as per conditional render test
|
|
bool disable_rendering() const;
|
|
|
|
// Returns true if a conditional render is active but not yet evaluated
|
|
bool eval_pending() const;
|
|
|
|
// Enable conditional rendering
|
|
void enable_conditional_render(thread* pthr, u32 address);
|
|
|
|
// Disable conditional rendering
|
|
void disable_conditional_render(thread* pthr);
|
|
|
|
// Sets data sources for predicate evaluation
|
|
void set_eval_sources(std::vector<occlusion_query_info*>& sources);
|
|
|
|
// Sets evaluation result. Result is true if conditional evaluation failed
|
|
void set_eval_result(thread* pthr, bool failed);
|
|
|
|
// Evaluates the condition by accessing memory directly
|
|
void eval_result(thread* pthr);
|
|
};
|
|
}
|
|
|
|
struct frame_statistics_t
|
|
{
|
|
u32 draw_calls;
|
|
s64 setup_time;
|
|
s64 vertex_upload_time;
|
|
s64 textures_upload_time;
|
|
s64 draw_exec_time;
|
|
s64 flip_time;
|
|
};
|
|
|
|
struct display_flip_info_t
|
|
{
|
|
std::deque<u32> buffer_queue;
|
|
u32 buffer;
|
|
bool skip_frame;
|
|
bool emu_flip;
|
|
bool in_progress;
|
|
frame_statistics_t stats;
|
|
|
|
inline void push(u32 _buffer)
|
|
{
|
|
buffer_queue.push_back(_buffer);
|
|
}
|
|
|
|
inline bool pop(u32 _buffer)
|
|
{
|
|
if (buffer_queue.empty())
|
|
{
|
|
return false;
|
|
}
|
|
|
|
do
|
|
{
|
|
const auto index = buffer_queue.front();
|
|
buffer_queue.pop_front();
|
|
|
|
if (index == _buffer)
|
|
{
|
|
buffer = _buffer;
|
|
return true;
|
|
}
|
|
}
|
|
while (!buffer_queue.empty());
|
|
|
|
// Need to observe this happening in the wild
|
|
rsx_log.error("Display queue was discarded while not empty!");
|
|
return false;
|
|
}
|
|
};
|
|
|
|
struct backend_configuration
|
|
{
|
|
bool supports_multidraw; // Draw call batching
|
|
bool supports_hw_a2c; // Alpha to coverage
|
|
bool supports_hw_renormalization; // Should be true on NV hardware which matches PS3 texture renormalization behaviour
|
|
bool supports_hw_a2one; // Alpha to one
|
|
bool supports_hw_conditional_render; // Conditional render
|
|
};
|
|
|
|
struct sampled_image_descriptor_base;
|
|
|
|
class thread
|
|
{
|
|
u64 timestamp_ctrl = 0;
|
|
u64 timestamp_subvalue = 0;
|
|
|
|
display_flip_info_t m_queued_flip{};
|
|
|
|
protected:
|
|
std::thread::id m_rsx_thread;
|
|
atomic_t<bool> m_rsx_thread_exiting{ true };
|
|
|
|
std::array<push_buffer_vertex_info, 16> vertex_push_buffers;
|
|
std::vector<u32> element_push_buffer;
|
|
|
|
s32 m_skip_frame_ctr = 0;
|
|
bool skip_current_frame = false;
|
|
frame_statistics_t stats{};
|
|
|
|
backend_configuration backend_config{};
|
|
|
|
// FIFO
|
|
std::unique_ptr<FIFO::FIFO_control> fifo_ctrl;
|
|
FIFO::flattening_helper m_flattener;
|
|
u32 fifo_ret_addr = RSX_CALL_STACK_EMPTY;
|
|
u32 saved_fifo_ret = RSX_CALL_STACK_EMPTY;
|
|
|
|
// Occlusion query
|
|
bool zcull_surface_active = false;
|
|
std::unique_ptr<reports::ZCULL_control> zcull_ctrl;
|
|
|
|
// Framebuffer setup
|
|
rsx::gcm_framebuffer_info m_surface_info[rsx::limits::color_buffers_count];
|
|
rsx::gcm_framebuffer_info m_depth_surface_info;
|
|
framebuffer_layout m_framebuffer_layout;
|
|
bool framebuffer_status_valid = false;
|
|
|
|
// Overlays
|
|
rsx::overlays::display_manager* m_overlay_manager = nullptr;
|
|
|
|
// Invalidated memory range
|
|
address_range m_invalidated_memory_range;
|
|
|
|
// Profiler
|
|
rsx::profiling_timer m_profiler;
|
|
frame_statistics_t m_frame_stats;
|
|
|
|
public:
|
|
RsxDmaControl* ctrl = nullptr;
|
|
rsx_iomap_table iomap_table;
|
|
u32 restore_point = 0;
|
|
atomic_t<u32> external_interrupt_lock{ 0 };
|
|
atomic_t<bool> external_interrupt_ack{ false };
|
|
void flush_fifo();
|
|
void recover_fifo();
|
|
static void fifo_wake_delay(u64 div = 1);
|
|
u32 get_fifo_cmd() const;
|
|
|
|
// Performance approximation counters
|
|
struct
|
|
{
|
|
atomic_t<u64> idle_time{ 0 }; // Time spent idling in microseconds
|
|
u64 last_update_timestamp = 0; // Timestamp of last load update
|
|
u64 FIFO_idle_timestamp = 0; // Timestamp of when FIFO queue becomes idle
|
|
FIFO_state state = FIFO_state::running;
|
|
u32 approximate_load = 0;
|
|
u32 sampled_frames = 0;
|
|
}
|
|
performance_counters;
|
|
|
|
enum class flip_request : u32
|
|
{
|
|
emu_requested = 1,
|
|
native_ui = 2,
|
|
|
|
any = emu_requested | native_ui
|
|
};
|
|
|
|
atomic_bitmask_t<flip_request> async_flip_requested{};
|
|
u8 async_flip_buffer{ 0 };
|
|
|
|
GcmTileInfo tiles[limits::tiles_count];
|
|
GcmZcullInfo zculls[limits::zculls_count];
|
|
|
|
void capture_frame(const std::string &name);
|
|
|
|
public:
|
|
std::shared_ptr<named_thread<class ppu_thread>> intr_thread;
|
|
|
|
// I hate this flag, but until hle is closer to lle, its needed
|
|
bool isHLE{ false };
|
|
|
|
u32 flip_status;
|
|
int debug_level;
|
|
|
|
atomic_t<bool> requested_vsync{false};
|
|
atomic_t<bool> enable_second_vhandler{false};
|
|
|
|
RsxDisplayInfo display_buffers[8];
|
|
u32 display_buffers_count{0};
|
|
u32 current_display_buffer{0};
|
|
u32 device_addr;
|
|
u32 label_addr;
|
|
|
|
u32 main_mem_size{0};
|
|
u32 local_mem_size{0};
|
|
|
|
bool m_rtts_dirty;
|
|
bool m_textures_dirty[16];
|
|
bool m_vertex_textures_dirty[4];
|
|
bool m_framebuffer_state_contested = false;
|
|
rsx::framebuffer_creation_context m_current_framebuffer_context = rsx::framebuffer_creation_context::context_draw;
|
|
|
|
u32 m_graphics_state = 0;
|
|
u64 ROP_sync_timestamp = 0;
|
|
|
|
program_hash_util::fragment_program_utils::fragment_program_metadata current_fp_metadata = {};
|
|
program_hash_util::vertex_program_utils::vertex_program_metadata current_vp_metadata = {};
|
|
|
|
protected:
|
|
std::array<u32, 4> get_color_surface_addresses() const;
|
|
u32 get_zeta_surface_address() const;
|
|
|
|
void get_framebuffer_layout(rsx::framebuffer_creation_context context, framebuffer_layout &layout);
|
|
bool get_scissor(areau& region, bool clip_viewport);
|
|
|
|
/**
|
|
* Analyze vertex inputs and group all interleaved blocks
|
|
*/
|
|
void analyse_inputs_interleaved(vertex_input_layout&) const;
|
|
|
|
RSXVertexProgram current_vertex_program = {};
|
|
RSXFragmentProgram current_fragment_program = {};
|
|
|
|
void get_current_vertex_program(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::vertex_textures_count>& sampler_descriptors, bool skip_textures = false, bool skip_vertex_inputs = true);
|
|
|
|
/**
|
|
* Gets current fragment program and associated fragment state
|
|
* get_surface_info is a helper takes 2 parameters: rsx_texture_address and surface_is_depth
|
|
* returns whether surface is a render target and surface pitch in native format
|
|
*/
|
|
void get_current_fragment_program(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count>& sampler_descriptors);
|
|
|
|
public:
|
|
double fps_limit = 59.94;
|
|
|
|
public:
|
|
u64 start_rsx_time = 0;
|
|
u64 int_flip_index = 0;
|
|
u64 last_flip_time = 0;
|
|
vm::ptr<void(u32)> flip_handler = vm::null;
|
|
vm::ptr<void(u32)> user_handler = vm::null;
|
|
vm::ptr<void(u32)> vblank_handler = vm::null;
|
|
atomic_t<u64> vblank_count{0};
|
|
|
|
public:
|
|
bool invalid_command_interrupt_raised = false;
|
|
bool sync_point_request = false;
|
|
bool in_begin_end = false;
|
|
|
|
atomic_t<s32> async_tasks_pending{ 0 };
|
|
|
|
bool zcull_stats_enabled = false;
|
|
bool zcull_rendering_enabled = false;
|
|
bool zcull_pixel_cnt_enabled = false;
|
|
|
|
reports::conditional_render_eval cond_render_ctrl;
|
|
|
|
void operator()();
|
|
virtual u64 get_cycles() = 0;
|
|
virtual ~thread();
|
|
|
|
static constexpr auto thread_name = "rsx::thread"sv;
|
|
|
|
protected:
|
|
thread();
|
|
virtual void on_task();
|
|
virtual void on_exit();
|
|
|
|
/**
|
|
* Execute a backend local task queue
|
|
*/
|
|
virtual void do_local_task(FIFO_state state);
|
|
|
|
virtual void on_decompiler_init() {}
|
|
virtual void on_decompiler_exit() {}
|
|
virtual bool on_decompiler_task() { return false; }
|
|
|
|
virtual void emit_geometry(u32) {}
|
|
|
|
void run_FIFO();
|
|
|
|
public:
|
|
virtual void clear_surface(u32 /*arg*/) {};
|
|
virtual void begin();
|
|
virtual void end();
|
|
virtual void execute_nop_draw();
|
|
|
|
virtual void on_init_rsx() = 0;
|
|
virtual void on_init_thread() = 0;
|
|
virtual void on_frame_end(u32 buffer, bool forced = false);
|
|
virtual void flip(const display_flip_info_t& info) = 0;
|
|
virtual u64 timestamp();
|
|
virtual bool on_access_violation(u32 /*address*/, bool /*is_writing*/) { return false; }
|
|
virtual void on_invalidate_memory_range(const address_range & /*range*/, rsx::invalidation_cause) {}
|
|
virtual void notify_tile_unbound(u32 /*tile*/) {}
|
|
|
|
// control
|
|
virtual void renderctl(u32 /*request_code*/, void* /*args*/) {}
|
|
|
|
// zcull
|
|
void notify_zcull_info_changed();
|
|
void clear_zcull_stats(u32 type);
|
|
void check_zcull_status(bool framebuffer_swap);
|
|
void get_zcull_stats(u32 type, vm::addr_t sink);
|
|
u32 copy_zcull_stats(u32 memory_range_start, u32 memory_range, u32 destination);
|
|
|
|
void enable_conditional_rendering(vm::addr_t ref);
|
|
void disable_conditional_rendering();
|
|
virtual void begin_conditional_rendering(const std::vector<reports::occlusion_query_info*>& sources);
|
|
virtual void end_conditional_rendering();
|
|
|
|
// sync
|
|
void sync();
|
|
flags32_t read_barrier(u32 memory_address, u32 memory_range, bool unconditional);
|
|
virtual void sync_hint(FIFO_hint hint, void* args);
|
|
|
|
gsl::span<const gsl::byte> get_raw_index_array(const draw_clause& draw_indexed_clause) const;
|
|
|
|
std::variant<draw_array_command, draw_indexed_array_command, draw_inlined_array>
|
|
get_draw_command(const rsx::rsx_state& state) const;
|
|
|
|
/**
|
|
* Immediate mode rendering requires a temp push buffer to hold attrib values
|
|
* Appends a value to the push buffer (currently only supports 32-wide types)
|
|
*/
|
|
void append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value);
|
|
u32 get_push_buffer_vertex_count() const;
|
|
|
|
void append_array_element(u32 index);
|
|
u32 get_push_buffer_index_count() const;
|
|
|
|
protected:
|
|
|
|
/**
|
|
* Computes VRAM requirements needed to upload raw vertex streams
|
|
* result.first contains persistent memory requirements
|
|
* result.second contains volatile memory requirements
|
|
*/
|
|
std::pair<u32, u32> calculate_memory_requirements(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count);
|
|
|
|
/**
|
|
* Generates vertex input descriptors as an array of 16x4 s32s
|
|
*/
|
|
void fill_vertex_layout_state(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, s32* buffer, u32 persistent_offset = 0, u32 volatile_offset = 0);
|
|
|
|
/**
|
|
* Uploads vertex data described in the layout descriptor
|
|
* Copies from local memory to the write-only output buffers provided in a sequential manner
|
|
*/
|
|
void write_vertex_data_to_memory(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, void *persistent_data, void *volatile_data);
|
|
|
|
private:
|
|
shared_mutex m_mtx_task;
|
|
|
|
void handle_emu_flip(u32 buffer);
|
|
void handle_invalidated_memory_range();
|
|
|
|
public:
|
|
//std::future<void> add_internal_task(std::function<bool()> callback);
|
|
//void invoke(std::function<bool()> callback);
|
|
|
|
/**
|
|
* Fill buffer with 4x4 scale offset matrix.
|
|
* Vertex shader's position is to be multiplied by this matrix.
|
|
* if flip_y is set, the matrix is modified to use d3d convention.
|
|
*/
|
|
void fill_scale_offset_data(void *buffer, bool flip_y) const;
|
|
|
|
/**
|
|
* Fill buffer with user clip information
|
|
*/
|
|
void fill_user_clip_data(void *buffer) const;
|
|
|
|
/**
|
|
* Fill buffer with vertex program constants.
|
|
* Buffer must be at least 512 float4 wide.
|
|
*/
|
|
void fill_vertex_program_constants_data(void *buffer);
|
|
|
|
/**
|
|
* Fill buffer with fragment rasterization state.
|
|
* Fills current fog values, alpha test parameters and texture scaling parameters
|
|
*/
|
|
void fill_fragment_state_buffer(void *buffer, const RSXFragmentProgram &fragment_program);
|
|
|
|
/**
|
|
* Fill buffer with fragment texture parameter constants (texture matrix)
|
|
*/
|
|
void fill_fragment_texture_parameters(void *buffer, const RSXFragmentProgram &fragment_program);
|
|
|
|
/**
|
|
* Notify that a section of memory has been mapped
|
|
* If there is a notify_memory_unmapped request on this range yet to be handled,
|
|
* handles it immediately.
|
|
*/
|
|
void on_notify_memory_mapped(u32 address_base, u32 size);
|
|
|
|
/**
|
|
* Notify that a section of memory has been unmapped
|
|
* Any data held in the defined range is discarded
|
|
*/
|
|
void on_notify_memory_unmapped(u32 address_base, u32 size);
|
|
|
|
/**
|
|
* Notify to check internal state during semaphore wait
|
|
*/
|
|
virtual void on_semaphore_acquire_wait() {}
|
|
|
|
/**
|
|
* Copy rtt values to buffer.
|
|
* TODO: It's more efficient to combine multiple call of this function into one.
|
|
*/
|
|
virtual std::array<std::vector<std::byte>, 4> copy_render_targets_to_memory() {
|
|
return std::array<std::vector<std::byte>, 4>();
|
|
}
|
|
|
|
/**
|
|
* Copy depth and stencil content to buffers.
|
|
* TODO: It's more efficient to combine multiple call of this function into one.
|
|
*/
|
|
virtual std::array<std::vector<std::byte>, 2> copy_depth_stencil_buffer_to_memory() {
|
|
return std::array<std::vector<std::byte>, 2>();
|
|
}
|
|
|
|
virtual std::pair<std::string, std::string> get_programs() const { return std::make_pair("", ""); }
|
|
|
|
virtual bool scaled_image_from_memory(blit_src_info& /*src_info*/, blit_dst_info& /*dst_info*/, bool /*interpolate*/) { return false; }
|
|
|
|
public:
|
|
void reset();
|
|
void init(u32 ctrlAddress);
|
|
|
|
tiled_region get_tiled_address(u32 offset, u32 location);
|
|
GcmTileInfo *find_tile(u32 offset, u32 location);
|
|
|
|
// Emu App/Game flip, only immediately flips when called from rsxthread
|
|
void request_emu_flip(u32 buffer);
|
|
|
|
void pause();
|
|
void unpause();
|
|
void wait_pause();
|
|
|
|
// Get RSX approximate load in %
|
|
u32 get_load();
|
|
|
|
// Returns true if the current thread is the active RSX thread
|
|
bool is_current_thread() const { return std::this_thread::get_id() == m_rsx_thread; }
|
|
};
|
|
|
|
inline thread* get_current_renderer()
|
|
{
|
|
return g_fxo->get<rsx::thread>();
|
|
}
|
|
}
|