rpcsx/rpcs3/Emu/RSX/RSXThread.h
kd-11 6e9392fb45 rsx: Restructure ZCULL query triggers
- Both ZCULL stats and ZPASS stats require hardware queries, but
  ZCULL stats should not contribute to ZPASS stats and vice versa!

- Disables hardware queries for ZCULL stats by themselves, we cannot
  generate them correctly anyway and no game so far has been found to
  actually use them. Should lessen the load on the backend for games
  that do not actually require it.
2020-02-26 19:40:35 +03:00

930 lines
26 KiB
C++

#pragma once
#include <deque>
#include <variant>
#include <stack>
#include "GCM.h"
#include "rsx_cache.h"
#include "RSXFIFO.h"
#include "RSXTexture.h"
#include "RSXOffload.h"
#include "RSXVertexProgram.h"
#include "RSXFragmentProgram.h"
#include "rsx_methods.h"
#include "rsx_utils.h"
#include "Overlays/overlays.h"
#include "Common/texture_cache_utils.h"
#include "Utilities/Thread.h"
#include "Utilities/geometry.h"
#include "Capture/rsx_trace.h"
#include "Capture/rsx_replay.h"
#include "Emu/Cell/lv2/sys_rsx.h"
#include "Emu/IdManager.h"
extern u64 get_guest_system_time();
extern u64 get_system_time();
extern bool user_asked_for_frame_capture;
extern bool capture_current_frame;
extern rsx::frame_trace_data frame_debug;
extern rsx::frame_capture_data frame_capture;
namespace rsx
{
struct rsx_iomap_table
{
std::array<atomic_t<u32>, 4096> ea;
std::array<atomic_t<u32>, 4096> io;
rsx_iomap_table() noexcept
{
std::fill(ea.begin(), ea.end(), -1);
std::fill(io.begin(), io.end(), -1);
}
// Try to get the real address given a mapped address
// Returns -1 on failure
u32 get_addr(u32 offs) const noexcept
{
return this->ea[offs >> 20] | (offs & 0xFFFFF);
}
};
enum framebuffer_creation_context : u8
{
context_draw = 0,
context_clear_color = 1,
context_clear_depth = 2,
context_clear_all = context_clear_color | context_clear_depth
};
enum pipeline_state : u32
{
fragment_program_dirty = 0x1, // Fragment program changed
vertex_program_dirty = 0x2, // Vertex program changed
fragment_state_dirty = 0x4, // Fragment state changed (alpha test, etc)
vertex_state_dirty = 0x8, // Vertex state changed (scale_offset, clip planes, etc)
transform_constants_dirty = 0x10, // Transform constants changed
fragment_constants_dirty = 0x20, // Fragment constants changed
framebuffer_reads_dirty = 0x40, // Framebuffer contents changed
fragment_texture_state_dirty = 0x80, // Fragment texture parameters changed
vertex_texture_state_dirty = 0x100, // Fragment texture parameters changed
scissor_config_state_dirty = 0x200, // Scissor region changed
scissor_setup_invalid = 0x400, // Scissor configuration is broken
scissor_setup_clipped = 0x800, // Scissor region is cropped by viewport constraint
invalidate_pipeline_bits = fragment_program_dirty | vertex_program_dirty,
memory_barrier_bits = framebuffer_reads_dirty,
all_dirty = ~0u
};
enum FIFO_state : u8
{
running = 0,
empty = 1, // PUT == GET
spinning = 2, // Puller continuously jumps to self addr (synchronization technique)
nop = 3, // Puller is processing a NOP command
lock_wait = 4 // Puller is processing a lock acquire
};
enum FIFO_hint : u8
{
hint_conditional_render_eval = 1,
hint_zcull_sync = 2
};
enum result_flags: u8
{
result_none = 0,
result_error = 1,
result_zcull_intr = 2
};
u32 get_vertex_type_size_on_host(vertex_base_type type, u32 size);
// TODO: Replace with std::source_location in c++20
u32 get_address(u32 offset, u32 location, const char* from);
struct tiled_region
{
u32 address;
u32 base;
GcmTileInfo *tile;
u8 *ptr;
void write(const void *src, u32 width, u32 height, u32 pitch);
void read(void *dst, u32 width, u32 height, u32 pitch);
};
struct vertex_array_buffer
{
rsx::vertex_base_type type;
u8 attribute_size;
u8 stride;
gsl::span<const std::byte> data;
u8 index;
bool is_be;
};
struct vertex_array_register
{
rsx::vertex_base_type type;
u8 attribute_size;
std::array<u32, 4> data;
u8 index;
};
struct empty_vertex_array
{
u8 index;
};
struct draw_array_command
{
u32 __dummy;
};
struct draw_indexed_array_command
{
gsl::span<const std::byte> raw_index_buffer;
};
struct draw_inlined_array
{
u32 __dummy;
u32 __dummy2;
};
struct interleaved_attribute_t
{
u8 index;
bool modulo;
u16 frequency;
};
struct interleaved_range_info
{
bool interleaved = false;
bool single_vertex = false;
u32 base_offset = 0;
u32 real_offset_address = 0;
u8 memory_location = 0;
u8 attribute_stride = 0;
rsx::simple_array<interleaved_attribute_t> locations;
// Check if we need to upload a full unoptimized range, i.e [0-max_index]
std::pair<u32, u32> calculate_required_range(u32 first, u32 count) const
{
if (single_vertex)
{
return { 0, 1 };
}
const u32 max_index = (first + count) - 1;
u32 _max_index = 0;
u32 _min_index = first;
for (const auto &attrib : locations)
{
if (attrib.frequency <= 1) [[likely]]
{
_max_index = max_index;
}
else
{
if (attrib.modulo)
{
if (max_index >= attrib.frequency)
{
// Actually uses the modulo operator, cannot safely optimize
_min_index = 0;
_max_index = std::max<u32>(_max_index, attrib.frequency - 1);
}
else
{
// Same as having no modulo
_max_index = max_index;
}
}
else
{
// Division operator
_min_index = std::min(_min_index, first / attrib.frequency);
_max_index = std::max<u32>(_max_index, max_index / attrib.frequency);
}
}
}
verify(HERE), _max_index >= _min_index;
return { _min_index, (_max_index - _min_index) + 1 };
}
};
enum attribute_buffer_placement : u8
{
none = 0,
persistent = 1,
transient = 2
};
struct vertex_input_layout
{
std::vector<interleaved_range_info> interleaved_blocks; // Interleaved blocks to be uploaded as-is
std::vector<std::pair<u8, u32>> volatile_blocks; // Volatile data blocks (immediate draw vertex data for example)
rsx::simple_array<u8> referenced_registers; // Volatile register data
std::array<attribute_buffer_placement, 16> attribute_placement;
vertex_input_layout()
{
attribute_placement.fill(attribute_buffer_placement::none);
}
void clear()
{
interleaved_blocks.clear();
volatile_blocks.clear();
referenced_registers.clear();
}
bool validate() const
{
// Criteria: At least one array stream has to be defined to feed vertex positions
// This stream cannot be a const register as the vertices cannot create a zero-area primitive
if (!interleaved_blocks.empty() && interleaved_blocks.front().attribute_stride != 0)
return true;
if (!volatile_blocks.empty())
return true;
for (u8 index = 0; index < limits::vertex_count; ++index)
{
switch (attribute_placement[index])
{
case attribute_buffer_placement::transient:
{
// Ignore register reference
if (std::find(referenced_registers.begin(), referenced_registers.end(), index) != referenced_registers.end())
continue;
// The source is inline array or immediate draw push buffer
return true;
}
case attribute_buffer_placement::persistent:
{
return true;
}
case attribute_buffer_placement::none:
{
continue;
}
default:
{
fmt::throw_exception("Unreachable" HERE);
}
}
}
return false;
}
u32 calculate_interleaved_memory_requirements(u32 first_vertex, u32 vertex_count) const
{
u32 mem = 0;
for (auto &block : interleaved_blocks)
{
const auto range = block.calculate_required_range(first_vertex, vertex_count);
mem += range.second * block.attribute_stride;
}
return mem;
}
};
struct framebuffer_layout
{
u16 width;
u16 height;
std::array<u32, 4> color_addresses;
std::array<u32, 4> color_pitch;
std::array<u32, 4> actual_color_pitch;
std::array<bool, 4> color_write_enabled;
u32 zeta_address;
u32 zeta_pitch;
u32 actual_zeta_pitch;
bool zeta_write_enabled;
rsx::surface_target target;
rsx::surface_color_format color_format;
rsx::surface_depth_format depth_format;
rsx::surface_antialiasing aa_mode;
u32 aa_factors[2];
bool depth_float;
bool ignore_change;
};
namespace reports
{
struct occlusion_query_info
{
u32 driver_handle;
u32 result;
u32 num_draws;
u32 data_type;
u64 sync_tag;
u64 timestamp;
bool pending;
bool active;
bool owned;
};
struct queued_report_write
{
u32 type = CELL_GCM_ZPASS_PIXEL_CNT;
u32 counter_tag;
occlusion_query_info* query;
queued_report_write* forwarder;
vm::addr_t sink; // Memory location of the report
std::vector<vm::addr_t> sink_alias; // Aliased memory addresses
};
struct query_search_result
{
bool found;
u32 raw_zpass_result;
std::vector<occlusion_query_info*> queries;
};
enum sync_control
{
sync_none = 0,
sync_defer_copy = 1, // If set, return a zcull intr code instead of forcefully reading zcull data
sync_no_notify = 2 // If set, backend hint notifications will not be made
};
class ZCULL_control
{
protected:
// Delay before a report update operation is forced to retire
const u32 max_zcull_delay_us = 300;
const u32 min_zcull_tick_us = 100;
// Number of occlusion query slots available. Real hardware actually has far fewer units before choking
const u32 occlusion_query_count = 1024;
const u32 max_safe_queue_depth = 892;
bool unit_enabled = false; // The ZCULL unit is on
bool write_enabled = false; // A surface in the ZCULL-monitored tile region has been loaded for rasterization
bool stats_enabled = false; // Collecting of ZCULL statistics is enabled (not same as pixels passing Z test!)
bool zpass_count_enabled = false; // Collecting of ZPASS statistics is enabled. If this is off, the counter does not increment
bool host_queries_active = false; // The backend/host is gathering Z data for the ZCULL unit
std::array<occlusion_query_info, 1024> m_occlusion_query_data = {};
std::stack<occlusion_query_info*> m_free_occlusion_pool;
occlusion_query_info* m_current_task = nullptr;
u32 m_statistics_tag_id = 0;
// Scheduling clock. Granunlarity is min_zcull_tick value.
u64 m_tsc = 0;
u64 m_next_tsc = 0;
// Incremental tag used for tracking sync events. Hardware clock resolution is too low for the job.
u64 m_sync_tag = 0;
u64 m_timer = 0;
std::vector<queued_report_write> m_pending_writes;
std::unordered_map<u32, u32> m_statistics_map;
// Enables/disables the ZCULL unit
void set_active(class ::rsx::thread* ptimer, bool active, bool flush_queue);
// Checks current state of the unit and applies changes
void check_state(class ::rsx::thread* ptimer, bool flush_queue);
// Sets up a new query slot and sets it to the current task
void allocate_new_query(class ::rsx::thread* ptimer);
// Free a query slot in use
void free_query(occlusion_query_info* query);
// Write report to memory
void write(vm::addr_t sink, u64 timestamp, u32 type, u32 value);
void write(queued_report_write* writer, u64 timestamp, u32 value);
public:
ZCULL_control();
~ZCULL_control();
void set_enabled(class ::rsx::thread* ptimer, bool state, bool flush_queue = false);
void set_status(class ::rsx::thread* ptimer, bool surface_active, bool zpass_active, bool zcull_stats_active, bool flush_queue = false);
// Read current zcull statistics into the address provided
void read_report(class ::rsx::thread* ptimer, vm::addr_t sink, u32 type);
// Clears current stat block and increments stat_tag_id
void clear(class ::rsx::thread* ptimer);
// Forcefully flushes all
void sync(class ::rsx::thread* ptimer);
// Conditionally sync any pending writes if range overlaps
flags32_t read_barrier(class ::rsx::thread* ptimer, u32 memory_address, u32 memory_range, flags32_t flags);
// Call once every 'tick' to update, optional address provided to partially sync until address is processed
void update(class ::rsx::thread* ptimer, u32 sync_address = 0, bool hint = false);
// Draw call notification
void on_draw();
// Sync hint notification
void on_sync_hint(void* args);
// Check for pending writes
bool has_pending() const { return !m_pending_writes.empty(); }
// Search for query synchronized at address
query_search_result find_query(vm::addr_t sink_address, bool all);
// Copies queries in range rebased from source range to destination range
u32 copy_reports_to(u32 start, u32 range, u32 dest);
// Backend methods (optional, will return everything as always visible by default)
virtual void begin_occlusion_query(occlusion_query_info* /*query*/) {}
virtual void end_occlusion_query(occlusion_query_info* /*query*/) {}
virtual bool check_occlusion_query_status(occlusion_query_info* /*query*/) { return true; }
virtual void get_occlusion_query_result(occlusion_query_info* query) { query->result = UINT32_MAX; }
virtual void discard_occlusion_query(occlusion_query_info* /*query*/) {}
};
// Helper class for conditional rendering
struct conditional_render_eval
{
bool enabled = false;
bool eval_failed = false;
bool hw_cond_active = false;
bool reserved = false;
std::vector<occlusion_query_info*> eval_sources;
u64 eval_sync_tag = 0;
u32 eval_address = 0;
// Resets common data
void reset();
// Returns true if rendering is disabled as per conditional render test
bool disable_rendering() const;
// Returns true if a conditional render is active but not yet evaluated
bool eval_pending() const;
// Enable conditional rendering
void enable_conditional_render(thread* pthr, u32 address);
// Disable conditional rendering
void disable_conditional_render(thread* pthr);
// Sets data sources for predicate evaluation
void set_eval_sources(std::vector<occlusion_query_info*>& sources);
// Sets evaluation result. Result is true if conditional evaluation failed
void set_eval_result(thread* pthr, bool failed);
// Evaluates the condition by accessing memory directly
void eval_result(thread* pthr);
};
}
struct frame_statistics_t
{
u32 draw_calls;
s64 setup_time;
s64 vertex_upload_time;
s64 textures_upload_time;
s64 draw_exec_time;
s64 flip_time;
};
struct display_flip_info_t
{
std::deque<u32> buffer_queue;
u32 buffer;
bool skip_frame;
bool emu_flip;
bool in_progress;
frame_statistics_t stats;
inline void push(u32 _buffer)
{
buffer_queue.push_back(_buffer);
}
inline bool pop(u32 _buffer)
{
if (buffer_queue.empty())
{
return false;
}
do
{
const auto index = buffer_queue.front();
buffer_queue.pop_front();
if (index == _buffer)
{
buffer = _buffer;
return true;
}
}
while (!buffer_queue.empty());
// Need to observe this happening in the wild
rsx_log.error("Display queue was discarded while not empty!");
return false;
}
};
struct backend_configuration
{
bool supports_multidraw; // Draw call batching
bool supports_hw_a2c; // Alpha to coverage
bool supports_hw_renormalization; // Should be true on NV hardware which matches PS3 texture renormalization behaviour
bool supports_hw_a2one; // Alpha to one
bool supports_hw_conditional_render; // Conditional render
};
struct sampled_image_descriptor_base;
class thread
{
u64 timestamp_ctrl = 0;
u64 timestamp_subvalue = 0;
display_flip_info_t m_queued_flip{};
protected:
std::thread::id m_rsx_thread;
atomic_t<bool> m_rsx_thread_exiting{ true };
std::array<push_buffer_vertex_info, 16> vertex_push_buffers;
std::vector<u32> element_push_buffer;
s32 m_skip_frame_ctr = 0;
bool skip_current_frame = false;
frame_statistics_t stats{};
backend_configuration backend_config{};
// FIFO
std::unique_ptr<FIFO::FIFO_control> fifo_ctrl;
FIFO::flattening_helper m_flattener;
u32 fifo_ret_addr = RSX_CALL_STACK_EMPTY;
u32 saved_fifo_ret = RSX_CALL_STACK_EMPTY;
// Occlusion query
bool zcull_surface_active = false;
std::unique_ptr<reports::ZCULL_control> zcull_ctrl;
// Framebuffer setup
rsx::gcm_framebuffer_info m_surface_info[rsx::limits::color_buffers_count];
rsx::gcm_framebuffer_info m_depth_surface_info;
framebuffer_layout m_framebuffer_layout;
bool framebuffer_status_valid = false;
// Overlays
rsx::overlays::display_manager* m_overlay_manager = nullptr;
// Invalidated memory range
address_range m_invalidated_memory_range;
// Profiler
rsx::profiling_timer m_profiler;
frame_statistics_t m_frame_stats;
public:
RsxDmaControl* ctrl = nullptr;
rsx_iomap_table iomap_table;
u32 restore_point = 0;
atomic_t<u32> external_interrupt_lock{ 0 };
atomic_t<bool> external_interrupt_ack{ false };
void flush_fifo();
void recover_fifo();
static void fifo_wake_delay(u64 div = 1);
u32 get_fifo_cmd() const;
// Performance approximation counters
struct
{
atomic_t<u64> idle_time{ 0 }; // Time spent idling in microseconds
u64 last_update_timestamp = 0; // Timestamp of last load update
u64 FIFO_idle_timestamp = 0; // Timestamp of when FIFO queue becomes idle
FIFO_state state = FIFO_state::running;
u32 approximate_load = 0;
u32 sampled_frames = 0;
}
performance_counters;
enum class flip_request : u32
{
emu_requested = 1,
native_ui = 2,
any = emu_requested | native_ui
};
atomic_bitmask_t<flip_request> async_flip_requested{};
u8 async_flip_buffer{ 0 };
GcmTileInfo tiles[limits::tiles_count];
GcmZcullInfo zculls[limits::zculls_count];
void capture_frame(const std::string &name);
public:
std::shared_ptr<named_thread<class ppu_thread>> intr_thread;
// I hate this flag, but until hle is closer to lle, its needed
bool isHLE{ false };
u32 flip_status;
int debug_level;
atomic_t<bool> requested_vsync{false};
atomic_t<bool> enable_second_vhandler{false};
RsxDisplayInfo display_buffers[8];
u32 display_buffers_count{0};
u32 current_display_buffer{0};
u32 device_addr;
u32 label_addr;
u32 main_mem_size{0};
u32 local_mem_size{0};
bool m_rtts_dirty;
bool m_textures_dirty[16];
bool m_vertex_textures_dirty[4];
bool m_framebuffer_state_contested = false;
rsx::framebuffer_creation_context m_current_framebuffer_context = rsx::framebuffer_creation_context::context_draw;
u32 m_graphics_state = 0;
u64 ROP_sync_timestamp = 0;
program_hash_util::fragment_program_utils::fragment_program_metadata current_fp_metadata = {};
program_hash_util::vertex_program_utils::vertex_program_metadata current_vp_metadata = {};
protected:
std::array<u32, 4> get_color_surface_addresses() const;
u32 get_zeta_surface_address() const;
void get_framebuffer_layout(rsx::framebuffer_creation_context context, framebuffer_layout &layout);
bool get_scissor(areau& region, bool clip_viewport);
/**
* Analyze vertex inputs and group all interleaved blocks
*/
void analyse_inputs_interleaved(vertex_input_layout&) const;
RSXVertexProgram current_vertex_program = {};
RSXFragmentProgram current_fragment_program = {};
void get_current_vertex_program(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::vertex_textures_count>& sampler_descriptors, bool skip_textures = false, bool skip_vertex_inputs = true);
/**
* Gets current fragment program and associated fragment state
* get_surface_info is a helper takes 2 parameters: rsx_texture_address and surface_is_depth
* returns whether surface is a render target and surface pitch in native format
*/
void get_current_fragment_program(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count>& sampler_descriptors);
public:
double fps_limit = 59.94;
public:
u64 start_rsx_time = 0;
u64 int_flip_index = 0;
u64 last_flip_time = 0;
vm::ptr<void(u32)> flip_handler = vm::null;
vm::ptr<void(u32)> user_handler = vm::null;
vm::ptr<void(u32)> vblank_handler = vm::null;
atomic_t<u64> vblank_count{0};
public:
bool invalid_command_interrupt_raised = false;
bool sync_point_request = false;
bool in_begin_end = false;
atomic_t<s32> async_tasks_pending{ 0 };
bool zcull_stats_enabled = false;
bool zcull_rendering_enabled = false;
bool zcull_pixel_cnt_enabled = false;
reports::conditional_render_eval cond_render_ctrl;
void operator()();
virtual u64 get_cycles() = 0;
virtual ~thread();
static constexpr auto thread_name = "rsx::thread"sv;
protected:
thread();
virtual void on_task();
virtual void on_exit();
/**
* Execute a backend local task queue
*/
virtual void do_local_task(FIFO_state state);
virtual void on_decompiler_init() {}
virtual void on_decompiler_exit() {}
virtual bool on_decompiler_task() { return false; }
virtual void emit_geometry(u32) {}
void run_FIFO();
public:
virtual void clear_surface(u32 /*arg*/) {};
virtual void begin();
virtual void end();
virtual void execute_nop_draw();
virtual void on_init_rsx() = 0;
virtual void on_init_thread() = 0;
virtual void on_frame_end(u32 buffer, bool forced = false);
virtual void flip(const display_flip_info_t& info) = 0;
virtual u64 timestamp();
virtual bool on_access_violation(u32 /*address*/, bool /*is_writing*/) { return false; }
virtual void on_invalidate_memory_range(const address_range & /*range*/, rsx::invalidation_cause) {}
virtual void notify_tile_unbound(u32 /*tile*/) {}
// control
virtual void renderctl(u32 /*request_code*/, void* /*args*/) {}
// zcull
void notify_zcull_info_changed();
void clear_zcull_stats(u32 type);
void check_zcull_status(bool framebuffer_swap);
void get_zcull_stats(u32 type, vm::addr_t sink);
u32 copy_zcull_stats(u32 memory_range_start, u32 memory_range, u32 destination);
void enable_conditional_rendering(vm::addr_t ref);
void disable_conditional_rendering();
virtual void begin_conditional_rendering(const std::vector<reports::occlusion_query_info*>& sources);
virtual void end_conditional_rendering();
// sync
void sync();
flags32_t read_barrier(u32 memory_address, u32 memory_range, bool unconditional);
virtual void sync_hint(FIFO_hint hint, void* args);
gsl::span<const gsl::byte> get_raw_index_array(const draw_clause& draw_indexed_clause) const;
std::variant<draw_array_command, draw_indexed_array_command, draw_inlined_array>
get_draw_command(const rsx::rsx_state& state) const;
/**
* Immediate mode rendering requires a temp push buffer to hold attrib values
* Appends a value to the push buffer (currently only supports 32-wide types)
*/
void append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value);
u32 get_push_buffer_vertex_count() const;
void append_array_element(u32 index);
u32 get_push_buffer_index_count() const;
protected:
/**
* Computes VRAM requirements needed to upload raw vertex streams
* result.first contains persistent memory requirements
* result.second contains volatile memory requirements
*/
std::pair<u32, u32> calculate_memory_requirements(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count);
/**
* Generates vertex input descriptors as an array of 16x4 s32s
*/
void fill_vertex_layout_state(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, s32* buffer, u32 persistent_offset = 0, u32 volatile_offset = 0);
/**
* Uploads vertex data described in the layout descriptor
* Copies from local memory to the write-only output buffers provided in a sequential manner
*/
void write_vertex_data_to_memory(const vertex_input_layout& layout, u32 first_vertex, u32 vertex_count, void *persistent_data, void *volatile_data);
private:
shared_mutex m_mtx_task;
void handle_emu_flip(u32 buffer);
void handle_invalidated_memory_range();
public:
//std::future<void> add_internal_task(std::function<bool()> callback);
//void invoke(std::function<bool()> callback);
/**
* Fill buffer with 4x4 scale offset matrix.
* Vertex shader's position is to be multiplied by this matrix.
* if flip_y is set, the matrix is modified to use d3d convention.
*/
void fill_scale_offset_data(void *buffer, bool flip_y) const;
/**
* Fill buffer with user clip information
*/
void fill_user_clip_data(void *buffer) const;
/**
* Fill buffer with vertex program constants.
* Buffer must be at least 512 float4 wide.
*/
void fill_vertex_program_constants_data(void *buffer);
/**
* Fill buffer with fragment rasterization state.
* Fills current fog values, alpha test parameters and texture scaling parameters
*/
void fill_fragment_state_buffer(void *buffer, const RSXFragmentProgram &fragment_program);
/**
* Fill buffer with fragment texture parameter constants (texture matrix)
*/
void fill_fragment_texture_parameters(void *buffer, const RSXFragmentProgram &fragment_program);
/**
* Notify that a section of memory has been mapped
* If there is a notify_memory_unmapped request on this range yet to be handled,
* handles it immediately.
*/
void on_notify_memory_mapped(u32 address_base, u32 size);
/**
* Notify that a section of memory has been unmapped
* Any data held in the defined range is discarded
*/
void on_notify_memory_unmapped(u32 address_base, u32 size);
/**
* Notify to check internal state during semaphore wait
*/
virtual void on_semaphore_acquire_wait() {}
/**
* Copy rtt values to buffer.
* TODO: It's more efficient to combine multiple call of this function into one.
*/
virtual std::array<std::vector<std::byte>, 4> copy_render_targets_to_memory() {
return std::array<std::vector<std::byte>, 4>();
}
/**
* Copy depth and stencil content to buffers.
* TODO: It's more efficient to combine multiple call of this function into one.
*/
virtual std::array<std::vector<std::byte>, 2> copy_depth_stencil_buffer_to_memory() {
return std::array<std::vector<std::byte>, 2>();
}
virtual std::pair<std::string, std::string> get_programs() const { return std::make_pair("", ""); }
virtual bool scaled_image_from_memory(blit_src_info& /*src_info*/, blit_dst_info& /*dst_info*/, bool /*interpolate*/) { return false; }
public:
void reset();
void init(u32 ctrlAddress);
tiled_region get_tiled_address(u32 offset, u32 location);
GcmTileInfo *find_tile(u32 offset, u32 location);
// Emu App/Game flip, only immediately flips when called from rsxthread
void request_emu_flip(u32 buffer);
void pause();
void unpause();
void wait_pause();
// Get RSX approximate load in %
u32 get_load();
// Returns true if the current thread is the active RSX thread
bool is_current_thread() const { return std::this_thread::get_id() == m_rsx_thread; }
};
inline thread* get_current_renderer()
{
return g_fxo->get<rsx::thread>();
}
}