#pragma once #include #include #include #include "rsx_utils.h" #include #include #include namespace rsx { class thread; static inline std::string_view location_tostring(u32 location) { ensure(location < 2); const char* location_names[] = { "CELL_GCM_LOCATION_LOCAL", "CELL_GCM_LOCATION_MAIN" }; return location_names[location]; } static inline u32 classify_location(u32 address) { return (address >= rsx::constants::local_mem_base) ? CELL_GCM_LOCATION_LOCAL : CELL_GCM_LOCATION_MAIN; } namespace reports { struct occlusion_query_info { u32 driver_handle; u32 result; u32 num_draws; u32 data_type; u64 sync_tag; u64 timestamp; bool pending; bool active; bool owned; }; struct queued_report_write { u32 type = CELL_GCM_ZPASS_PIXEL_CNT; u32 counter_tag; occlusion_query_info* query; queued_report_write* forwarder; vm::addr_t sink; // Memory location of the report std::vector sink_alias; // Aliased memory addresses }; struct query_search_result { bool found; u32 raw_zpass_result; std::vector queries; }; struct query_stat_counter { u32 result; u32 reserved; }; struct sync_hint_payload_t { occlusion_query_info* query; vm::addr_t address; void* other_params; }; struct MMIO_page_data_t : public rsx::ref_counted { utils::protection prot = utils::protection::rw; }; enum sync_control { sync_none = 0, sync_defer_copy = 1, // If set, return a zcull intr code instead of forcefully reading zcull data sync_no_notify = 2 // If set, backend hint notifications will not be made }; class ZCULL_control { private: std::unordered_map m_locked_pages[2]; atomic_t m_pages_accessed[2] = { false, false }; atomic_t m_critical_reports_in_flight = { 0 }; shared_mutex m_pages_mutex; void on_report_enqueued(vm::addr_t address); void on_report_completed(vm::addr_t address); void disable_optimizations(class ::rsx::thread* ptimer, u32 location); protected: // Delay before a report update operation is forced to retire const u32 max_zcull_delay_us = 300; const u32 min_zcull_tick_us = 100; // Number of occlusion query slots available. Real hardware actually has far fewer units before choking const u32 occlusion_query_count = 1024; const u32 max_safe_queue_depth = 892; bool unit_enabled = false; // The ZCULL unit is on bool write_enabled = false; // A surface in the ZCULL-monitored tile region has been loaded for rasterization bool stats_enabled = false; // Collecting of ZCULL statistics is enabled (not same as pixels passing Z test!) bool zpass_count_enabled = false; // Collecting of ZPASS statistics is enabled. If this is off, the counter does not increment bool host_queries_active = false; // The backend/host is gathering Z data for the ZCULL unit std::array m_occlusion_query_data = {}; std::stack m_free_occlusion_pool{}; occlusion_query_info* m_current_task = nullptr; u32 m_statistics_tag_id = 0; // Scheduling clock. Granunlarity is min_zcull_tick value. u64 m_tsc = 0; u64 m_next_tsc = 0; // Incremental tag used for tracking sync events. Hardware clock resolution is too low for the job. u64 m_sync_tag = 0; u64 m_timer = 0; std::vector m_pending_writes{}; std::unordered_map m_statistics_map{}; // Enables/disables the ZCULL unit void set_active(class ::rsx::thread* ptimer, bool state, bool flush_queue); // Checks current state of the unit and applies changes void check_state(class ::rsx::thread* ptimer, bool flush_queue); // Sets up a new query slot and sets it to the current task void allocate_new_query(class ::rsx::thread* ptimer); // Free a query slot in use void free_query(occlusion_query_info* query); // Write report to memory void write(vm::addr_t sink, u64 timestamp, u32 type, u32 value); void write(queued_report_write* writer, u64 timestamp, u32 value); // Retire operation void retire(class ::rsx::thread* ptimer, queued_report_write* writer, u32 result); public: ZCULL_control(); virtual ~ZCULL_control(); ZCULL_control(const ZCULL_control&) = delete; ZCULL_control& operator=(const ZCULL_control&) = delete; void set_enabled(class ::rsx::thread* ptimer, bool state, bool flush_queue = false); void set_status(class ::rsx::thread* ptimer, bool surface_active, bool zpass_active, bool zcull_stats_active, bool flush_queue = false); // Read current zcull statistics into the address provided void read_report(class ::rsx::thread* ptimer, vm::addr_t sink, u32 type); // Clears current stat block and increments stat_tag_id void clear(class ::rsx::thread* ptimer, u32 type); // Forcefully flushes all void sync(class ::rsx::thread* ptimer); // Conditionally sync any pending writes if range overlaps flags32_t read_barrier(class ::rsx::thread* ptimer, u32 memory_address, u32 memory_range, flags32_t flags); flags32_t read_barrier(class ::rsx::thread* ptimer, u32 memory_address, occlusion_query_info* query); // Call once every 'tick' to update, optional address provided to partially sync until address is processed void update(class ::rsx::thread* ptimer, u32 sync_address = 0, bool hint = false); // Draw call notification void on_draw(); // Sync hint notification void on_sync_hint(sync_hint_payload_t payload); // Check for pending writes bool has_pending() const { return !m_pending_writes.empty(); } // Search for query synchronized at address query_search_result find_query(vm::addr_t sink_address, bool all); // Copies queries in range rebased from source range to destination range u32 copy_reports_to(u32 start, u32 range, u32 dest); // Check paging issues bool on_access_violation(u32 address); // Optimization check bool is_query_result_urgent(u32 address) const { return m_pages_accessed[rsx::classify_location(address)]; } // Backend methods (optional, will return everything as always visible by default) virtual void begin_occlusion_query(occlusion_query_info* /*query*/) {} virtual void end_occlusion_query(occlusion_query_info* /*query*/) {} virtual bool check_occlusion_query_status(occlusion_query_info* /*query*/) { return true; } virtual void get_occlusion_query_result(occlusion_query_info* query) { query->result = -1; } virtual void discard_occlusion_query(occlusion_query_info* /*query*/) {} }; // Helper class for conditional rendering struct conditional_render_eval { bool enabled = false; bool eval_failed = false; bool hw_cond_active = false; bool reserved = false; std::vector eval_sources; u64 eval_sync_tag = 0; u32 eval_address = 0; // Resets common data void reset(); // Returns true if rendering is disabled as per conditional render test bool disable_rendering() const; // Returns true if a conditional render is active but not yet evaluated bool eval_pending() const; // Enable conditional rendering void enable_conditional_render(thread* pthr, u32 address); // Disable conditional rendering void disable_conditional_render(thread* pthr); // Sets data sources for predicate evaluation void set_eval_sources(std::vector& sources); // Sets evaluation result. Result is true if conditional evaluation failed void set_eval_result(thread* pthr, bool failed); // Evaluates the condition by accessing memory directly void eval_result(thread* pthr); }; } }