2020-12-05 13:08:24 +01:00
# include "stdafx.h"
2012-11-15 00:39:56 +01:00
# include "RSXThread.h"
2014-09-11 21:18:19 +02:00
2016-02-01 22:50:02 +01:00
# include "Emu/Cell/PPUCallback.h"
2022-09-06 17:59:23 +02:00
# include "Emu/Cell/SPUThread.h"
2021-03-23 20:32:50 +01:00
# include "Emu/Cell/timers.hpp"
2015-10-11 22:00:51 +02:00
2015-10-14 19:47:09 +02:00
# include "Common/BufferUtils.h"
2022-03-23 20:53:18 +01:00
# include "Common/buffer_stream.hpp"
2017-10-30 13:27:22 +01:00
# include "Common/texture_cache.h"
2018-07-23 23:55:15 +02:00
# include "Common/surface_store.h"
2022-03-06 13:09:28 +01:00
# include "Common/time.hpp"
2018-05-13 11:18:05 +02:00
# include "Capture/rsx_capture.h"
2016-01-05 22:29:49 +01:00
# include "rsx_methods.h"
2021-01-15 19:28:45 +01:00
# include "gcm_printing.h"
2022-03-25 16:17:25 +01:00
# include "RSXDisAsm.h"
2018-05-27 12:48:21 +02:00
# include "Emu/Cell/lv2/sys_event.h"
2021-03-23 20:32:50 +01:00
# include "Emu/Cell/lv2/sys_time.h"
2018-05-27 12:48:21 +02:00
# include "Emu/Cell/Modules/cellGcmSys.h"
2019-10-18 03:16:48 +02:00
# include "Overlays/overlay_perf_metrics.h"
2022-10-29 19:53:00 +02:00
# include "Overlays/overlay_message.h"
2021-05-12 23:56:01 +02:00
# include "Program/GLSLCommon.h"
2020-02-22 20:42:49 +01:00
# include "Utilities/date_time.h"
2016-04-27 00:27:24 +02:00
# include "Utilities/StrUtil.h"
2016-04-20 01:32:27 +02:00
2021-06-01 18:13:05 +02:00
# include "util/serialization.hpp"
2020-11-07 23:56:35 +01:00
# include "util/asm.hpp"
2021-05-30 16:10:46 +02:00
# include <span>
2018-07-20 16:22:21 +02:00
# include <sstream>
2016-05-13 16:01:48 +02:00
# include <thread>
2018-05-13 11:18:05 +02:00
# include <unordered_set>
2019-06-08 09:14:48 +02:00
# include <cfenv>
2016-05-13 16:01:48 +02:00
2016-07-24 01:56:03 +02:00
class GSRender ;
2015-10-09 20:04:20 +02:00
# define CMD_DEBUG 0
2013-11-09 22:29:49 +01:00
2020-12-06 13:15:19 +01:00
atomic_t < bool > g_user_asked_for_frame_capture = false ;
2020-05-11 12:27:00 +02:00
atomic_t < bool > g_disable_frame_limit = false ;
2018-05-13 11:18:05 +02:00
rsx : : frame_trace_data frame_debug ;
rsx : : frame_capture_data frame_capture ;
2016-02-01 22:50:02 +01:00
2018-05-27 12:48:21 +02:00
extern CellGcmOffsetTable offsetTable ;
2018-12-05 09:54:37 +01:00
extern thread_local std : : string ( * g_tls_log_prefix ) ( ) ;
2022-09-13 20:16:44 +02:00
extern atomic_t < u32 > g_lv2_preempts_taken ;
2018-05-27 12:48:21 +02:00
2022-09-06 17:59:23 +02:00
LOG_CHANNEL ( perf_log , " PERF " ) ;
2021-06-01 18:13:05 +02:00
template < >
bool serialize < rsx : : rsx_state > ( utils : : serial & ar , rsx : : rsx_state & o )
{
2022-07-12 15:04:33 +02:00
ar ( o . transform_program ) ;
2022-08-04 15:05:46 +02:00
// Work around for old RSX captures.
// RSX capture and savestates both call this method.
// We do not want to grab transform constants if it is not savestate capture.
const bool is_savestate_capture = thread_ctrl : : get_current ( ) & & thread_ctrl : : get_name ( ) = = " Emu State Capture Thread " ;
if ( GET_SERIALIZATION_VERSION ( global_version ) | | is_savestate_capture )
2022-07-12 15:04:33 +02:00
{
ar ( o . transform_constants ) ;
}
return ar ( o . registers ) ;
2021-06-01 18:13:05 +02:00
}
template < >
bool serialize < rsx : : frame_capture_data > ( utils : : serial & ar , rsx : : frame_capture_data & o )
{
ar ( o . magic , o . version , o . LE_format ) ;
2021-09-02 19:00:30 +02:00
if ( o . magic ! = rsx : : c_fc_magic | | o . version ! = rsx : : c_fc_version | | o . LE_format ! = u32 { std : : endian : : little = = std : : endian : : native } )
2021-06-01 18:13:05 +02:00
{
return false ;
}
return ar ( o . tile_map , o . memory_map , o . memory_data_map , o . display_buffers_map , o . replay_commands , o . reg_state ) ;
}
template < >
bool serialize < rsx : : frame_capture_data : : memory_block_data > ( utils : : serial & ar , rsx : : frame_capture_data : : memory_block_data & o )
{
return ar ( o . data ) ;
}
template < >
bool serialize < rsx : : frame_capture_data : : replay_command > ( utils : : serial & ar , rsx : : frame_capture_data : : replay_command & o )
{
return ar ( o . rsx_command , o . memory_state , o . tile_state , o . display_buffer_state ) ;
}
2022-07-04 15:02:17 +02:00
template < >
bool serialize < rsx : : rsx_iomap_table > ( utils : : serial & ar , rsx : : rsx_iomap_table & o )
{
// We do not need more than that
ar ( std : : span ( o . ea . data ( ) , 512 ) ) ;
if ( ! ar . is_writing ( ) )
{
// Populate o.io
for ( const atomic_t < u32 > & ea_addr : o . ea )
{
const u32 & addr = ea_addr . raw ( ) ;
if ( addr ! = umax )
{
o . io [ addr > > 20 ] . raw ( ) = static_cast < u32 > ( & ea_addr - o . ea . data ( ) ) < < 20 ;
}
}
}
return true ;
}
2015-10-08 15:53:21 +02:00
namespace rsx
2015-10-09 20:04:20 +02:00
{
2016-01-20 13:46:58 +01:00
std : : function < bool ( u32 addr , bool is_writing ) > g_access_violation_handler ;
2019-09-26 17:32:31 +02:00
2022-06-04 14:35:06 +02:00
rsx_iomap_table : : rsx_iomap_table ( ) noexcept
: ea ( fill_array ( - 1 ) )
, io ( fill_array ( - 1 ) )
{
}
2021-08-13 18:24:50 +02:00
u32 get_address ( u32 offset , u32 location , u32 size_to_check , u32 line , u32 col , const char * file , const char * func )
2015-10-09 20:04:20 +02:00
{
2020-01-14 14:50:53 +01:00
const auto render = get_current_renderer ( ) ;
2020-01-30 16:44:29 +01:00
std : : string_view msg ;
2015-10-04 00:45:26 +02:00
2015-10-11 22:00:51 +02:00
switch ( location )
2015-10-09 20:04:20 +02:00
{
2017-07-26 04:33:32 +02:00
case CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER :
case CELL_GCM_LOCATION_LOCAL :
{
2021-08-13 18:24:50 +02:00
if ( offset < render - > local_mem_size & & render - > local_mem_size - offset > = size_to_check )
2020-01-14 14:50:53 +01:00
{
return rsx : : constants : : local_mem_base + offset ;
}
2020-01-30 16:44:29 +01:00
msg = " Local RSX offset out of range! " sv ;
break ;
2017-07-26 04:33:32 +02:00
}
case CELL_GCM_CONTEXT_DMA_MEMORY_HOST_BUFFER :
case CELL_GCM_LOCATION_MAIN :
{
2020-01-16 21:40:47 +01:00
if ( const u32 ea = render - > iomap_table . get_addr ( offset ) ; ea + 1 )
2017-02-17 19:42:34 +01:00
{
2021-08-13 18:24:50 +02:00
if ( ! size_to_check | | vm : : check_addr ( ea , 0 , size_to_check ) )
{
return ea ;
}
2017-02-17 19:42:34 +01:00
}
2015-10-04 00:45:26 +02:00
2020-01-30 16:44:29 +01:00
msg = " RSXIO memory not mapped! " sv ;
break ;
2017-07-26 04:33:32 +02:00
}
2017-02-17 19:42:34 +01:00
2017-07-26 04:33:32 +02:00
case CELL_GCM_CONTEXT_DMA_REPORT_LOCATION_LOCAL :
2020-01-14 14:50:53 +01:00
{
if ( offset < sizeof ( RsxReports : : report ) /*&& (offset % 0x10) == 0*/ )
{
2020-01-31 14:59:44 +01:00
return render - > label_addr + : : offset32 ( & RsxReports : : report ) + offset ;
2020-01-14 14:50:53 +01:00
}
2020-01-30 16:44:29 +01:00
msg = " Local RSX REPORT offset out of range! " sv ;
break ;
2020-01-14 14:50:53 +01:00
}
2017-02-17 19:42:34 +01:00
2017-07-26 04:33:32 +02:00
case CELL_GCM_CONTEXT_DMA_REPORT_LOCATION_MAIN :
{
2020-01-16 21:40:47 +01:00
if ( const u32 ea = offset < 0x1000000 ? render - > iomap_table . get_addr ( 0x0e000000 + offset ) : - 1 ; ea + 1 )
2017-07-26 04:33:32 +02:00
{
2021-08-13 18:24:50 +02:00
if ( ! size_to_check | | vm : : check_addr ( ea , 0 , size_to_check ) )
{
return ea ;
}
2015-10-11 22:00:51 +02:00
}
2015-10-04 00:45:26 +02:00
2020-01-30 16:44:29 +01:00
msg = " RSXIO REPORT memory not mapped! " sv ;
break ;
2017-07-26 04:33:32 +02:00
}
2017-02-17 19:42:34 +01:00
2020-01-14 14:50:53 +01:00
// They are handled elsewhere for targeted methods, so it's unexpected for them to be passed here
2017-07-26 04:33:32 +02:00
case CELL_GCM_CONTEXT_DMA_TO_MEMORY_GET_NOTIFY0 :
2021-01-20 07:00:34 +01:00
case CELL_GCM_CONTEXT_DMA_TO_MEMORY_GET_NOTIFY1 :
case CELL_GCM_CONTEXT_DMA_TO_MEMORY_GET_NOTIFY2 :
case CELL_GCM_CONTEXT_DMA_TO_MEMORY_GET_NOTIFY3 :
case CELL_GCM_CONTEXT_DMA_TO_MEMORY_GET_NOTIFY4 :
case CELL_GCM_CONTEXT_DMA_TO_MEMORY_GET_NOTIFY5 :
case CELL_GCM_CONTEXT_DMA_TO_MEMORY_GET_NOTIFY6 :
case CELL_GCM_CONTEXT_DMA_TO_MEMORY_GET_NOTIFY7 :
msg = " CELL_GCM_CONTEXT_DMA_TO_MEMORY_GET_NOTIFYx " sv ; break ;
2017-02-17 19:42:34 +01:00
2017-07-26 04:33:32 +02:00
case CELL_GCM_CONTEXT_DMA_NOTIFY_MAIN_0 :
2021-01-20 07:00:34 +01:00
case CELL_GCM_CONTEXT_DMA_NOTIFY_MAIN_1 :
case CELL_GCM_CONTEXT_DMA_NOTIFY_MAIN_2 :
case CELL_GCM_CONTEXT_DMA_NOTIFY_MAIN_3 :
case CELL_GCM_CONTEXT_DMA_NOTIFY_MAIN_4 :
case CELL_GCM_CONTEXT_DMA_NOTIFY_MAIN_5 :
case CELL_GCM_CONTEXT_DMA_NOTIFY_MAIN_6 :
case CELL_GCM_CONTEXT_DMA_NOTIFY_MAIN_7 :
msg = " CELL_GCM_CONTEXT_DMA_NOTIFY_MAIN_x " sv ; break ;
2017-02-17 19:42:34 +01:00
2017-07-26 04:33:32 +02:00
case CELL_GCM_CONTEXT_DMA_SEMAPHORE_RW :
case CELL_GCM_CONTEXT_DMA_SEMAPHORE_R :
2020-01-14 14:50:53 +01:00
{
if ( offset < sizeof ( RsxReports : : semaphore ) /*&& (offset % 0x10) == 0*/ )
{
return render - > label_addr + offset ;
}
2017-02-17 19:42:34 +01:00
2020-01-30 16:44:29 +01:00
msg = " DMA SEMAPHORE offset out of range! " sv ;
break ;
2020-01-14 14:50:53 +01:00
}
2017-02-17 19:42:34 +01:00
2020-01-14 14:50:53 +01:00
case CELL_GCM_CONTEXT_DMA_DEVICE_RW :
2017-07-26 04:33:32 +02:00
case CELL_GCM_CONTEXT_DMA_DEVICE_R :
2020-02-01 09:07:25 +01:00
{
2020-01-14 14:50:53 +01:00
if ( offset < 0x100000 /*&& (offset % 0x10) == 0*/ )
{
return render - > device_addr + offset ;
}
// TODO: What happens here? It could wrap around or access other segments of rsx internal memory etc
// Or can simply throw access violation error
2020-01-30 16:44:29 +01:00
msg = " DMA DEVICE offset out of range! " sv ;
break ;
2020-01-14 14:50:53 +01:00
}
2015-10-04 00:45:26 +02:00
2017-07-26 04:33:32 +02:00
default :
2018-10-01 22:05:51 +02:00
{
2020-01-30 16:44:29 +01:00
msg = " Invalid location! " sv ;
break ;
2015-10-09 20:04:20 +02:00
}
2018-10-01 22:05:51 +02:00
}
2020-01-30 16:44:29 +01:00
2021-08-13 18:24:50 +02:00
if ( size_to_check )
2021-01-14 18:54:27 +01:00
{
2021-08-13 18:24:50 +02:00
// Allow failure if specified size
// This is to allow accurate recovery for failures
rsx_log . warning ( " rsx::get_address(offset=0x%x, location=0x%x, size=0x%x): %s%s " , offset , location , size_to_check , msg , src_loc { line , col , file , func } ) ;
2021-01-14 18:54:27 +01:00
return 0 ;
}
2020-12-09 16:04:52 +01:00
fmt : : throw_exception ( " rsx::get_address(offset=0x%x, location=0x%x): %s%s " , offset , location , msg , src_loc { line , col , file , func } ) ;
2015-10-09 20:04:20 +02:00
}
2015-10-04 00:45:26 +02:00
2022-09-06 17:59:23 +02:00
extern void set_rsx_yield_flag ( ) noexcept
{
if ( auto rsx = get_current_renderer ( ) )
{
if ( g_cfg . core . allow_rsx_cpu_preempt )
{
rsx - > state + = cpu_flag : : yield ;
}
}
}
2022-10-29 19:53:00 +02:00
extern void set_native_ui_flip ( )
{
if ( auto rsxthr = rsx : : get_current_renderer ( ) )
{
rsxthr - > async_flip_requested | = rsx : : thread : : flip_request : : native_ui ;
}
}
2020-12-18 15:43:34 +01:00
std : : pair < u32 , u32 > interleaved_range_info : : calculate_required_range ( u32 first , u32 count ) const
{
if ( single_vertex )
{
return { 0 , 1 } ;
}
const u32 max_index = ( first + count ) - 1 ;
u32 _max_index = 0 ;
u32 _min_index = first ;
for ( const auto & attrib : locations )
{
if ( attrib . frequency < = 1 ) [[likely]]
{
_max_index = max_index ;
}
else
{
if ( attrib . modulo )
{
if ( max_index > = attrib . frequency )
{
// Actually uses the modulo operator
_min_index = 0 ;
_max_index = attrib . frequency - 1 ;
}
else
{
// Same as having no modulo
_max_index = max_index ;
}
}
else
{
// Division operator
_min_index = std : : min ( _min_index , first / attrib . frequency ) ;
_max_index = std : : max < u32 > ( _max_index , utils : : aligned_div ( max_index , attrib . frequency ) ) ;
}
}
}
ensure ( _max_index > = _min_index ) ;
return { _min_index , ( _max_index - _min_index ) + 1 } ;
}
2016-01-20 14:23:25 +01:00
u32 get_vertex_type_size_on_host ( vertex_base_type type , u32 size )
2014-07-01 01:57:49 +02:00
{
2015-10-11 22:00:51 +02:00
switch ( type )
2015-02-08 23:44:55 +01:00
{
2016-01-20 14:23:25 +01:00
case vertex_base_type : : s1 :
2016-02-22 22:07:38 +01:00
case vertex_base_type : : s32k :
2016-01-07 23:35:59 +01:00
switch ( size )
{
case 1 :
case 2 :
case 4 :
return sizeof ( u16 ) * size ;
case 3 :
return sizeof ( u16 ) * 4 ;
2019-05-11 07:36:16 +02:00
default :
break ;
2016-01-07 23:35:59 +01:00
}
2020-12-09 16:04:52 +01:00
fmt : : throw_exception ( " Wrong vector size " ) ;
2016-02-22 21:36:48 +01:00
case vertex_base_type : : f : return sizeof ( f32 ) * size ;
2016-01-20 14:23:25 +01:00
case vertex_base_type : : sf :
2016-01-07 23:35:59 +01:00
switch ( size )
{
case 1 :
case 2 :
case 4 :
return sizeof ( f16 ) * size ;
case 3 :
return sizeof ( f16 ) * 4 ;
2019-05-11 07:36:16 +02:00
default :
break ;
2016-01-07 23:35:59 +01:00
}
2020-12-09 16:04:52 +01:00
fmt : : throw_exception ( " Wrong vector size " ) ;
2016-01-20 14:23:25 +01:00
case vertex_base_type : : ub :
2016-01-07 23:35:59 +01:00
switch ( size )
{
case 1 :
case 2 :
case 4 :
return sizeof ( u8 ) * size ;
case 3 :
return sizeof ( u8 ) * 4 ;
2019-05-11 07:36:16 +02:00
default :
break ;
2016-01-07 23:35:59 +01:00
}
2020-12-09 16:04:52 +01:00
fmt : : throw_exception ( " Wrong vector size " ) ;
2017-07-31 13:38:28 +02:00
case vertex_base_type : : cmp : return 4 ;
2020-12-09 08:47:45 +01:00
case vertex_base_type : : ub256 : ensure ( size = = 4 ) ; return sizeof ( u8 ) * 4 ;
2019-05-11 07:36:16 +02:00
default :
break ;
2015-10-09 20:04:20 +02:00
}
2020-12-09 16:04:52 +01:00
fmt : : throw_exception ( " RSXVertexData::GetTypeSize: Bad vertex data type (%d)! " , static_cast < u8 > ( type ) ) ;
2014-07-01 01:57:49 +02:00
}
2016-01-20 13:46:58 +01:00
2016-01-05 21:55:43 +01:00
void tiled_region : : write ( const void * src , u32 width , u32 height , u32 pitch )
2016-01-05 17:26:44 +01:00
{
if ( ! tile )
{
memcpy ( ptr , src , height * pitch ) ;
return ;
}
u32 offset_x = base % tile - > pitch ;
u32 offset_y = base / tile - > pitch ;
switch ( tile - > comp )
{
case CELL_GCM_COMPMODE_C32_2X1 :
case CELL_GCM_COMPMODE_DISABLED :
2017-04-13 19:29:47 +02:00
for ( u32 y = 0 ; y < height ; + + y )
2016-01-05 17:26:44 +01:00
{
2019-12-03 23:34:23 +01:00
memcpy ( ptr + ( offset_y + y ) * tile - > pitch + offset_x , static_cast < const u8 * > ( src ) + pitch * y , pitch ) ;
2016-01-05 17:26:44 +01:00
}
break ;
/*
case CELL_GCM_COMPMODE_C32_2X1 :
for ( u32 y = 0 ; y < height ; + + y )
{
for ( u32 x = 0 ; x < width ; + + x )
{
u32 value = * ( u32 * ) ( ( u8 * ) src + pitch * y + x * sizeof ( u32 ) ) ;
* ( u32 * ) ( ptr + ( offset_y + y ) * tile - > pitch + offset_x + ( x * 2 + 0 ) * sizeof ( u32 ) ) = value ;
* ( u32 * ) ( ptr + ( offset_y + y ) * tile - > pitch + offset_x + ( x * 2 + 1 ) * sizeof ( u32 ) ) = value ;
}
}
break ;
*/
case CELL_GCM_COMPMODE_C32_2X2 :
for ( u32 y = 0 ; y < height ; + + y )
{
for ( u32 x = 0 ; x < width ; + + x )
{
2019-12-03 23:34:23 +01:00
u32 value = * reinterpret_cast < const u32 * > ( static_cast < const u8 * > ( src ) + pitch * y + x * sizeof ( u32 ) ) ;
2016-01-05 17:26:44 +01:00
2019-12-03 23:34:23 +01:00
* reinterpret_cast < u32 * > ( ptr + ( offset_y + y * 2 + 0 ) * tile - > pitch + offset_x + ( x * 2 + 0 ) * sizeof ( u32 ) ) = value ;
* reinterpret_cast < u32 * > ( ptr + ( offset_y + y * 2 + 0 ) * tile - > pitch + offset_x + ( x * 2 + 1 ) * sizeof ( u32 ) ) = value ;
* reinterpret_cast < u32 * > ( ptr + ( offset_y + y * 2 + 1 ) * tile - > pitch + offset_x + ( x * 2 + 0 ) * sizeof ( u32 ) ) = value ;
* reinterpret_cast < u32 * > ( ptr + ( offset_y + y * 2 + 1 ) * tile - > pitch + offset_x + ( x * 2 + 1 ) * sizeof ( u32 ) ) = value ;
2016-01-05 17:26:44 +01:00
}
}
break ;
default :
2020-12-09 14:03:15 +01:00
: : narrow ( tile - > comp ) ;
2016-01-05 17:26:44 +01:00
}
}
2016-01-05 21:55:43 +01:00
void tiled_region : : read ( void * dst , u32 width , u32 height , u32 pitch )
2016-01-05 17:26:44 +01:00
{
if ( ! tile )
{
memcpy ( dst , ptr , height * pitch ) ;
return ;
}
u32 offset_x = base % tile - > pitch ;
u32 offset_y = base / tile - > pitch ;
switch ( tile - > comp )
{
case CELL_GCM_COMPMODE_C32_2X1 :
case CELL_GCM_COMPMODE_DISABLED :
2017-04-13 19:29:47 +02:00
for ( u32 y = 0 ; y < height ; + + y )
2016-01-05 17:26:44 +01:00
{
2019-12-03 23:34:23 +01:00
memcpy ( static_cast < u8 * > ( dst ) + pitch * y , ptr + ( offset_y + y ) * tile - > pitch + offset_x , pitch ) ;
2016-01-05 17:26:44 +01:00
}
break ;
/*
case CELL_GCM_COMPMODE_C32_2X1 :
for ( u32 y = 0 ; y < height ; + + y )
{
for ( u32 x = 0 ; x < width ; + + x )
{
u32 value = * ( u32 * ) ( ptr + ( offset_y + y ) * tile - > pitch + offset_x + ( x * 2 + 0 ) * sizeof ( u32 ) ) ;
* ( u32 * ) ( ( u8 * ) dst + pitch * y + x * sizeof ( u32 ) ) = value ;
}
}
break ;
*/
case CELL_GCM_COMPMODE_C32_2X2 :
for ( u32 y = 0 ; y < height ; + + y )
{
for ( u32 x = 0 ; x < width ; + + x )
{
2019-12-03 23:34:23 +01:00
u32 value = * reinterpret_cast < u32 * > ( ptr + ( offset_y + y * 2 + 0 ) * tile - > pitch + offset_x + ( x * 2 + 0 ) * sizeof ( u32 ) ) ;
2016-01-05 17:26:44 +01:00
2019-12-03 23:34:23 +01:00
* reinterpret_cast < u32 * > ( static_cast < u8 * > ( dst ) + pitch * y + x * sizeof ( u32 ) ) = value ;
2016-01-05 17:26:44 +01:00
}
}
break ;
default :
2020-12-09 14:03:15 +01:00
: : narrow ( tile - > comp ) ;
2016-01-05 17:26:44 +01:00
}
}
2014-07-01 01:57:49 +02:00
2019-09-26 17:32:31 +02:00
thread : : ~ thread ( )
{
g_access_violation_handler = nullptr ;
}
2022-07-04 15:02:17 +02:00
void thread : : save ( utils : : serial & ar )
{
2022-07-11 09:26:38 +02:00
[[maybe_unused]] const s32 version = GET_OR_USE_SERIALIZATION_VERSION ( ar . is_writing ( ) , rsx ) ;
2022-07-04 15:02:17 +02:00
ar ( rsx : : method_registers ) ;
for ( auto & v : vertex_push_buffers )
{
ar ( v . attr , v . size , v . type , v . vertex_count , v . dword_count , v . data ) ;
}
ar ( element_push_buffer , fifo_ret_addr , saved_fifo_ret , zcull_surface_active , m_surface_info , m_depth_surface_info , m_framebuffer_layout ) ;
ar ( dma_address , iomap_table , restore_point , tiles , zculls , display_buffers , display_buffers_count , current_display_buffer ) ;
ar ( enable_second_vhandler , requested_vsync ) ;
ar ( device_addr , label_addr , main_mem_size , local_mem_size , rsx_event_port , driver_info ) ;
2022-07-09 08:26:34 +02:00
ar ( in_begin_end ) ;
2022-07-04 15:02:17 +02:00
ar ( display_buffers , display_buffers_count , current_display_buffer ) ;
2022-07-09 12:19:08 +02:00
ar ( unsent_gcm_events , rsx : : method_registers . current_draw_clause ) ;
2022-07-11 09:26:38 +02:00
if ( ar . is_writing ( ) )
{
if ( fifo_ctrl & & state & cpu_flag : : again )
{
ar ( fifo_ctrl - > get_remaining_args_count ( ) + 1 ) ;
ar ( fifo_ctrl - > last_cmd ( ) ) ;
}
else
{
ar ( u32 { 0 } ) ;
}
}
else if ( version > 1 )
{
if ( u32 count = ar )
{
restore_fifo_count = count ;
ar ( restore_fifo_cmd ) ;
}
}
2022-07-04 15:02:17 +02:00
}
thread : : thread ( utils : : serial * _ar )
2021-01-22 09:11:54 +01:00
: cpu_thread ( 0x5555'5555 )
2016-01-20 13:46:58 +01:00
{
g_access_violation_handler = [ this ] ( u32 address , bool is_writing )
{
return on_access_violation ( address , is_writing ) ;
} ;
2018-05-10 13:50:32 +02:00
2016-02-04 23:25:04 +01:00
m_rtts_dirty = true ;
2020-03-12 20:22:49 +01:00
m_textures_dirty . fill ( true ) ;
m_vertex_textures_dirty . fill ( true ) ;
2018-04-20 22:44:34 +02:00
m_graphics_state = pipeline_state : : all_dirty ;
2019-08-26 02:08:16 +02:00
2020-04-22 20:36:10 +02:00
g_user_asked_for_frame_capture = false ;
2020-03-09 20:06:38 +01:00
2019-08-26 02:08:16 +02:00
if ( g_cfg . misc . use_native_interface & & ( g_cfg . video . renderer = = video_renderer : : opengl | | g_cfg . video . renderer = = video_renderer : : vulkan ) )
{
m_overlay_manager = g_fxo - > init < rsx : : overlays : : display_manager > ( 0 ) ;
}
2021-01-22 09:11:54 +01:00
state - = cpu_flag : : stop + cpu_flag : : wait ; // TODO: Remove workaround
2022-07-04 15:02:17 +02:00
if ( ! _ar )
{
return ;
}
serialized = true ;
save ( * _ar ) ;
if ( dma_address )
{
ctrl = vm : : _ptr < RsxDmaControl > ( dma_address ) ;
m_rsx_thread_exiting = false ;
}
if ( g_cfg . savestate . start_paused )
{
2022-10-29 19:53:00 +02:00
// Allow to render a whole frame within this emulation session so there won't be missing graphics
m_pause_after_x_flips = 2 ;
2022-07-04 15:02:17 +02:00
}
}
avconf : : avconf ( utils : : serial & ar )
{
ar ( * this ) ;
}
void avconf : : save ( utils : : serial & ar )
{
ar ( * this ) ;
2016-01-20 13:46:58 +01:00
}
2015-11-09 23:57:35 +01:00
void thread : : capture_frame ( const std : : string & name )
2015-11-06 00:56:27 +01:00
{
2021-03-29 15:20:10 +02:00
frame_trace_data : : draw_state draw_state { } ;
2016-02-23 22:56:14 +01:00
2015-11-10 18:59:15 +01:00
draw_state . programs = get_programs ( ) ;
2015-11-09 23:57:35 +01:00
draw_state . name = name ;
2021-03-29 15:20:10 +02:00
frame_debug . draw_calls . emplace_back ( std : : move ( draw_state ) ) ;
2015-11-06 00:56:27 +01:00
}
2015-10-11 22:00:51 +02:00
void thread : : begin ( )
2015-10-09 20:04:20 +02:00
{
2019-11-30 13:44:47 +01:00
if ( cond_render_ctrl . hw_cond_active )
2018-07-19 18:57:01 +02:00
{
2019-11-30 13:44:47 +01:00
if ( ! cond_render_ctrl . eval_pending ( ) )
{
// End conditional rendering if still active
end_conditional_rendering ( ) ;
}
// If hw cond render is enabled and evalutation is still pending, do nothing
}
else if ( cond_render_ctrl . eval_pending ( ) )
{
// Evaluate conditional rendering test or enable hw cond render until results are available
if ( backend_config . supports_hw_conditional_render )
{
// In this mode, it is possible to skip the cond render while the backend is still processing data.
// The backend guarantees that any draw calls emitted during this time will NOT generate any ROP writes
2020-12-09 08:47:45 +01:00
ensure ( ! cond_render_ctrl . hw_cond_active ) ;
2019-11-30 13:44:47 +01:00
// Pending evaluation, use hardware test
2019-12-10 05:56:44 +01:00
begin_conditional_rendering ( cond_render_ctrl . eval_sources ) ;
2019-11-30 13:44:47 +01:00
}
else
{
2020-06-30 19:41:56 +02:00
// NOTE: eval_sources list is reversed with newest query first
zcull_ctrl - > read_barrier ( this , cond_render_ctrl . eval_address , cond_render_ctrl . eval_sources . front ( ) ) ;
2020-12-09 08:47:45 +01:00
ensure ( ! cond_render_ctrl . eval_pending ( ) ) ;
2019-11-30 13:44:47 +01:00
}
2018-07-19 18:57:01 +02:00
}
2017-03-25 22:59:57 +01:00
in_begin_end = true ;
}
2017-03-26 11:01:50 +02:00
void thread : : append_to_push_buffer ( u32 attribute , u32 size , u32 subreg_index , vertex_base_type type , u32 value )
2017-03-25 22:59:57 +01:00
{
2021-11-13 21:47:07 +01:00
if ( ! ( rsx : : method_registers . vertex_attrib_input_mask ( ) & ( 1 < < attribute ) ) )
{
return ;
}
// Enforce ATTR0 as vertex attribute for push buffers.
// This whole thing becomes a mess if we don't have a provoking attribute.
const auto vertex_id = vertex_push_buffers [ 0 ] . get_vertex_id ( ) ;
vertex_push_buffers [ attribute ] . set_vertex_data ( attribute , vertex_id , subreg_index , type , size , value ) ;
2022-03-06 13:09:28 +01:00
m_graphics_state | = rsx : : pipeline_state : : push_buffer_arrays_dirty ;
2017-03-25 22:59:57 +01:00
}
2017-03-28 12:41:45 +02:00
u32 thread : : get_push_buffer_vertex_count ( ) const
2017-03-25 22:59:57 +01:00
{
2021-11-13 21:47:07 +01:00
// Enforce ATTR0 as vertex attribute for push buffers.
// This whole thing becomes a mess if we don't have a provoking attribute.
return vertex_push_buffers [ 0 ] . vertex_count ;
2015-10-09 20:04:20 +02:00
}
2015-01-31 14:01:34 +01:00
2017-03-28 12:41:45 +02:00
void thread : : append_array_element ( u32 index )
{
2021-11-13 21:47:07 +01:00
// Endianness is swapped because common upload code expects input in BE
// TODO: Implement fast upload path for LE inputs and do away with this
2019-09-26 20:57:03 +02:00
element_push_buffer . push_back ( std : : bit_cast < u32 , be_t < u32 > > ( index ) ) ;
2017-03-28 12:41:45 +02:00
}
u32 thread : : get_push_buffer_index_count ( ) const
{
2019-12-03 23:34:23 +01:00
return : : size32 ( element_push_buffer ) ;
2017-03-28 12:41:45 +02:00
}
2015-10-11 20:59:46 +02:00
void thread : : end ( )
{
2018-05-13 11:18:05 +02:00
if ( capture_current_frame )
2022-03-06 13:09:28 +01:00
{
2018-05-13 11:18:05 +02:00
capture : : capture_draw_memory ( this ) ;
2022-03-06 13:09:28 +01:00
}
2018-05-13 11:18:05 +02:00
2017-03-25 22:59:57 +01:00
in_begin_end = false ;
2019-09-19 19:08:06 +02:00
m_frame_stats . draw_calls + + ;
2016-06-28 11:58:44 +02:00
2018-11-21 12:38:56 +01:00
method_registers . current_draw_clause . post_execute_cleanup ( ) ;
2018-06-23 16:50:34 +02:00
m_graphics_state | = rsx : : pipeline_state : : framebuffer_reads_dirty ;
2022-05-16 22:38:11 +02:00
m_eng_interrupt_mask | = rsx : : backend_interrupt ;
2021-09-17 21:59:37 +02:00
ROP_sync_timestamp = rsx : : get_shared_tag ( ) ;
2018-06-23 16:50:34 +02:00
2022-03-06 13:09:28 +01:00
if ( m_graphics_state & rsx : : pipeline_state : : push_buffer_arrays_dirty )
2016-06-26 20:28:19 +02:00
{
2022-03-06 13:09:28 +01:00
for ( auto & push_buf : vertex_push_buffers )
{
//Disabled, see https://github.com/RPCS3/rpcs3/issues/1932
//rsx::method_registers.register_vertex_info[index].size = 0;
push_buf . clear ( ) ;
}
2017-03-25 22:59:57 +01:00
2022-03-06 13:09:28 +01:00
m_graphics_state & = ~ rsx : : pipeline_state : : push_buffer_arrays_dirty ;
2016-06-26 20:28:19 +02:00
}
2018-10-01 22:05:51 +02:00
element_push_buffer . clear ( ) ;
2017-03-28 12:41:45 +02:00
2020-02-22 16:19:16 +01:00
zcull_ctrl - > on_draw ( ) ;
2017-11-16 22:52:21 +01:00
2015-11-06 00:56:27 +01:00
if ( capture_current_frame )
2016-01-06 01:24:57 +01:00
{
2016-08-04 19:59:50 +02:00
u32 element_count = rsx : : method_registers . current_draw_clause . get_elements_count ( ) ;
2021-01-15 19:28:45 +01:00
capture_frame ( fmt : : format ( " Draw %s %d " , rsx : : method_registers . current_draw_clause . primitive , element_count ) ) ;
2016-01-06 01:24:57 +01:00
}
2015-10-11 20:59:46 +02:00
}
2014-06-28 03:19:44 +02:00
2018-11-21 12:38:56 +01:00
void thread : : execute_nop_draw ( )
{
method_registers . current_draw_clause . begin ( ) ;
do
{
method_registers . current_draw_clause . execute_pipeline_dependencies ( ) ;
}
while ( method_registers . current_draw_clause . next ( ) ) ;
}
2021-01-22 09:11:54 +01:00
void thread : : cpu_task ( )
2018-07-20 16:22:21 +02:00
{
2021-03-21 17:55:47 +01:00
while ( Emu . IsReady ( ) )
2018-10-11 00:17:19 +02:00
{
2021-03-21 17:55:47 +01:00
thread_ctrl : : wait_for ( 1000 ) ;
2018-10-11 00:17:19 +02:00
}
2021-03-21 17:55:47 +01:00
on_task ( ) ;
2018-10-11 00:17:19 +02:00
on_exit ( ) ;
2018-07-20 16:22:21 +02:00
}
2021-09-07 23:39:39 +02:00
void thread : : cpu_wait ( bs_t < cpu_flag > old )
2021-01-22 09:11:54 +01:00
{
if ( external_interrupt_lock )
{
wait_pause ( ) ;
}
2021-09-07 23:39:39 +02:00
if ( ( state & ( cpu_flag : : dbg_global_pause + cpu_flag : : exit ) ) = = cpu_flag : : dbg_global_pause )
{
// Wait 16ms during emulation pause. This reduces cpu load while still giving us the chance to render overlays.
2022-10-29 19:53:00 +02:00
do_local_task ( rsx : : FIFO_state : : paused ) ;
2021-09-07 23:39:39 +02:00
thread_ctrl : : wait_on ( state , old , 16000 ) ;
}
else
{
2022-10-29 19:53:00 +02:00
on_semaphore_acquire_wait ( ) ;
2021-09-07 23:39:39 +02:00
std : : this_thread : : yield ( ) ;
}
2021-01-22 09:11:54 +01:00
}
2022-05-21 18:39:14 +02:00
void thread : : post_vblank_event ( u64 post_event_time )
{
vblank_count + + ;
if ( isHLE )
{
if ( auto ptr = vblank_handler )
{
intr_thread - > cmd_list
( {
{ ppu_cmd : : set_args , 1 } , u64 { 1 } ,
{ ppu_cmd : : lle_call , ptr } ,
{ ppu_cmd : : sleep , 0 }
} ) ;
intr_thread - > cmd_notify + + ;
intr_thread - > cmd_notify . notify_one ( ) ;
}
}
else
{
sys_rsx_context_attribute ( 0x55555555 , 0xFED , 1 , get_guest_system_time ( post_event_time ) , 0 , 0 ) ;
}
}
2022-07-09 08:26:34 +02:00
namespace nv4097
{
void set_render_mode ( thread * rsx , u32 , u32 arg ) ;
}
2015-11-26 09:06:29 +01:00
void thread : : on_task ( )
2015-10-09 20:04:20 +02:00
{
2018-12-05 09:54:37 +01:00
g_tls_log_prefix = [ ]
{
const auto rsx = get_current_renderer ( ) ;
2021-03-21 17:55:47 +01:00
return fmt : : format ( " RSX [0x%07x] " , rsx - > ctrl ? + rsx - > ctrl - > get : 0 ) ;
2018-12-05 09:54:37 +01:00
} ;
2022-07-04 15:02:17 +02:00
if ( ! serialized ) method_registers . init ( ) ;
2021-01-27 19:08:42 +01:00
2019-10-18 03:16:48 +02:00
rsx : : overlays : : reset_performance_overlay ( ) ;
2018-05-20 22:05:00 +02:00
2021-03-02 12:59:19 +01:00
g_fxo - > get < rsx : : dma_manager > ( ) . init ( ) ;
2015-11-26 09:06:29 +01:00
on_init_thread ( ) ;
2015-10-11 20:59:46 +02:00
2021-03-21 17:55:47 +01:00
is_inited = true ;
is_inited . notify_all ( ) ;
2018-03-05 12:09:43 +01:00
if ( ! zcull_ctrl )
{
//Backend did not provide an implementation, provide NULL object
zcull_ctrl = std : : make_unique < : : rsx : : reports : : ZCULL_control > ( ) ;
}
2022-07-09 08:26:34 +02:00
check_zcull_status ( false ) ;
nv4097 : : set_render_mode ( this , 0 , method_registers . registers [ NV4097_SET_RENDER_ENABLE ] ) ;
2021-03-21 17:55:47 +01:00
performance_counters . state = FIFO_state : : empty ;
2022-07-11 09:26:38 +02:00
const u64 event_flags = unsent_gcm_events . exchange ( 0 ) ;
2022-07-04 15:02:17 +02:00
Emu . CallFromMainThread ( [ ] { Emu . RunPPU ( ) ; } ) ;
2021-03-21 17:55:47 +01:00
// Wait for startup (TODO)
2022-07-04 15:02:17 +02:00
while ( m_rsx_thread_exiting | | Emu . IsPaused ( ) )
2021-03-21 17:55:47 +01:00
{
// Execute backend-local tasks first
do_local_task ( performance_counters . state ) ;
// Update sub-units
zcull_ctrl - > update ( this ) ;
if ( is_stopped ( ) )
{
return ;
}
thread_ctrl : : wait_for ( 1000 ) ;
}
performance_counters . state = FIFO_state : : running ;
2018-09-24 15:03:25 +02:00
fifo_ctrl = std : : make_unique < : : rsx : : FIFO : : FIFO_control > ( this ) ;
2022-07-04 15:02:17 +02:00
fifo_ctrl - > set_get ( ctrl - > get ) ;
2018-09-24 15:03:25 +02:00
2022-04-13 21:29:26 +02:00
last_guest_flip_timestamp = rsx : : uclock ( ) - 1000000 ;
2014-06-28 03:19:44 +02:00
2019-07-11 07:34:15 +02:00
vblank_count = 0 ;
2022-07-11 09:26:38 +02:00
if ( restore_fifo_count )
2022-07-07 09:35:24 +02:00
{
2022-07-11 09:26:38 +02:00
fifo_ctrl - > restore_state ( restore_fifo_cmd , restore_fifo_count ) ;
}
if ( ! send_event ( 0 , event_flags , 0 ) )
{
return ;
2022-07-07 09:35:24 +02:00
}
2020-02-25 12:07:50 +01:00
g_fxo - > init < named_thread > ( " VBlank Thread " , [ this ] ( )
2015-10-09 20:04:20 +02:00
{
2019-07-11 07:34:15 +02:00
// See sys_timer_usleep for details
# ifdef __linux__
constexpr u32 host_min_quantum = 50 ;
# else
2020-10-21 08:54:22 +02:00
constexpr u32 host_min_quantum = 500 ;
2019-07-11 07:34:15 +02:00
# endif
2022-05-27 20:01:42 +02:00
u64 start_time = get_system_time ( ) ;
2015-10-04 00:45:26 +02:00
2021-10-28 20:53:34 +02:00
u64 vblank_rate = g_cfg . video . vblank_rate ;
u64 vblank_period = 1'000'000 + u64 { g_cfg . video . vblank_ntsc . get ( ) } * 1000 ;
u64 local_vblank_count = 0 ;
2015-11-26 09:06:29 +01:00
// TODO: exit condition
2022-09-01 16:54:31 +02:00
while ( ! is_stopped ( ) & & ! unsent_gcm_events & & thread_ctrl : : state ( ) ! = thread_state : : aborting )
2015-10-09 20:04:20 +02:00
{
2021-10-28 20:53:34 +02:00
// Get current time
2022-05-27 20:01:42 +02:00
const u64 current = get_system_time ( ) ;
2021-10-28 20:53:34 +02:00
// Calculate the time at which we need to send a new VBLANK signal
const u64 post_event_time = start_time + ( local_vblank_count + 1 ) * vblank_period / vblank_rate ;
// Calculate time remaining to that time (0 if we passed it)
const u64 wait_for = current > = post_event_time ? 0 : post_event_time - current ;
// Substract host operating system min sleep quantom to get sleep time
2021-06-26 07:54:57 +02:00
const u64 wait_sleep = wait_for - u64 { wait_for > = host_min_quantum } * host_min_quantum ;
2020-01-29 21:42:41 +01:00
2021-06-26 07:54:57 +02:00
if ( ! wait_for )
2015-10-05 17:40:22 +02:00
{
{
2021-10-28 20:53:34 +02:00
local_vblank_count + + ;
2019-07-11 07:34:15 +02:00
2021-10-28 20:53:34 +02:00
if ( local_vblank_count = = vblank_rate )
{
// Advance start_time to the moment of the current VBLANK
// Which is the last VBLANK event in this period
// This is in order for multiplication by ratio above to use only small numbers
start_time + = vblank_period ;
local_vblank_count = 0 ;
// We have a rare chance to update settings without losing precision whenever local_vblank_count is 0
vblank_rate = g_cfg . video . vblank_rate ;
vblank_period = 1'000'000 + u64 { g_cfg . video . vblank_ntsc . get ( ) } * 1000 ;
}
2022-05-21 18:39:14 +02:00
post_vblank_event ( post_event_time ) ;
2015-10-11 20:59:46 +02:00
}
2021-06-26 07:54:57 +02:00
}
else if ( wait_sleep )
{
2019-07-11 07:34:15 +02:00
thread_ctrl : : wait_for ( wait_sleep ) ;
2021-06-26 07:54:57 +02:00
}
else if ( wait_for > = host_min_quantum / 3 * 2 )
{
std : : this_thread : : yield ( ) ;
2015-10-11 20:59:46 +02:00
}
2018-01-01 09:43:06 +01:00
2019-10-21 13:03:35 +02:00
if ( Emu . IsPaused ( ) )
{
// Save the difference before pause
2022-03-06 13:09:28 +01:00
start_time = rsx : : uclock ( ) - start_time ;
2019-12-03 23:34:23 +01:00
2021-01-22 09:11:54 +01:00
while ( Emu . IsPaused ( ) & & ! is_stopped ( ) )
2019-10-21 13:03:35 +02:00
{
2021-10-29 20:51:17 +02:00
thread_ctrl : : wait_for ( 5'000 ) ;
2019-10-21 13:03:35 +02:00
}
// Restore difference
2022-03-06 13:09:28 +01:00
start_time = rsx : : uclock ( ) - start_time ;
2019-10-21 13:03:35 +02:00
}
2015-10-09 20:04:20 +02:00
}
2020-02-25 09:51:41 +01:00
} ) ;
2015-10-11 20:59:46 +02:00
2017-07-26 18:32:13 +02:00
// Raise priority above other threads
2021-01-25 19:49:16 +01:00
thread_ctrl : : scoped_priority high_prio ( + 1 ) ;
2017-10-21 13:21:37 +02:00
2021-03-17 17:28:03 +01:00
if ( g_cfg . core . thread_scheduler ! = thread_scheduler_mode : : os )
2017-10-21 13:21:37 +02:00
{
thread_ctrl : : set_thread_affinity_mask ( thread_ctrl : : get_affinity_mask ( thread_class : : rsx ) ) ;
}
2017-07-26 18:32:13 +02:00
2021-01-22 09:11:54 +01:00
while ( ! test_stopped ( ) )
2015-10-11 20:59:46 +02:00
{
2018-09-24 15:03:25 +02:00
// Wait for external pause events
2020-01-16 21:40:47 +01:00
if ( external_interrupt_lock )
2017-12-09 11:14:00 +01:00
{
2020-02-11 22:36:46 +01:00
wait_pause ( ) ;
2017-12-09 11:14:00 +01:00
}
2018-11-12 14:15:28 +01:00
// Note a possible rollback address
2019-12-12 13:38:56 +01:00
if ( sync_point_request & & ! in_begin_end )
2018-11-12 14:15:28 +01:00
{
restore_point = ctrl - > get ;
2019-09-28 09:29:16 +02:00
saved_fifo_ret = fifo_ret_addr ;
2020-04-07 12:18:41 +02:00
sync_point_request . release ( false ) ;
2018-11-12 14:15:28 +01:00
}
2022-05-14 17:41:33 +02:00
// Update sub-units every 64 cycles. The local handler is invoked for other functions externally on-demand anyway.
// This avoids expensive calls to check timestamps which involves reading some values from TLS storage on windows.
// If something is going on in the backend that requires an update, set the interrupt bit explicitly.
2022-05-16 22:38:11 +02:00
if ( ( m_cycles_counter + + & 63 ) = = 0 | | m_eng_interrupt_mask )
2022-05-14 17:41:33 +02:00
{
// Execute backend-local tasks first
do_local_task ( performance_counters . state ) ;
2018-01-17 17:14:00 +01:00
2022-05-14 17:41:33 +02:00
// Update other sub-units
zcull_ctrl - > update ( this ) ;
}
2018-03-05 12:09:43 +01:00
2018-10-20 16:43:00 +02:00
// Execute FIFO queue
2018-09-24 15:03:25 +02:00
run_FIFO ( ) ;
2015-11-26 09:06:29 +01:00
}
}
2015-10-11 22:00:51 +02:00
2016-07-20 14:16:19 +02:00
void thread : : on_exit ( )
{
2022-07-04 15:02:17 +02:00
if ( zcull_ctrl )
{
zcull_ctrl - > sync ( this ) ;
}
2019-09-12 17:35:11 +02:00
// Deregister violation handler
g_access_violation_handler = nullptr ;
// Clear any pending flush requests to release threads
std : : this_thread : : sleep_for ( 10 ms ) ;
do_local_task ( rsx : : FIFO_state : : lock_wait ) ;
2021-03-02 12:59:19 +01:00
g_fxo - > get < rsx : : dma_manager > ( ) . join ( ) ;
2021-01-22 09:11:54 +01:00
state + = cpu_flag : : exit ;
2015-10-11 20:59:46 +02:00
}
2014-02-16 09:56:58 +01:00
2017-06-14 17:47:01 +02:00
void thread : : fill_scale_offset_data ( void * buffer , bool flip_y ) const
2015-10-31 18:08:49 +01:00
{
2016-06-26 23:37:02 +02:00
int clip_w = rsx : : method_registers . surface_clip_width ( ) ;
int clip_h = rsx : : method_registers . surface_clip_height ( ) ;
2015-10-31 18:08:49 +01:00
2016-06-26 23:37:02 +02:00
float scale_x = rsx : : method_registers . viewport_scale_x ( ) / ( clip_w / 2.f ) ;
float offset_x = rsx : : method_registers . viewport_offset_x ( ) - ( clip_w / 2.f ) ;
2015-10-31 18:08:49 +01:00
offset_x / = clip_w / 2.f ;
2016-06-26 23:37:02 +02:00
float scale_y = rsx : : method_registers . viewport_scale_y ( ) / ( clip_h / 2.f ) ;
float offset_y = ( rsx : : method_registers . viewport_offset_y ( ) - ( clip_h / 2.f ) ) ;
2015-10-31 18:08:49 +01:00
offset_y / = clip_h / 2.f ;
2017-04-25 12:32:39 +02:00
if ( flip_y ) scale_y * = - 1 ;
if ( flip_y ) offset_y * = - 1 ;
2015-10-31 18:08:49 +01:00
2018-01-18 13:06:28 +01:00
float scale_z = rsx : : method_registers . viewport_scale_z ( ) ;
float offset_z = rsx : : method_registers . viewport_offset_z ( ) ;
2015-10-31 18:08:49 +01:00
float one = 1.f ;
2022-03-23 20:53:18 +01:00
utils : : stream_vector ( buffer , std : : bit_cast < u32 > ( scale_x ) , 0 , 0 , std : : bit_cast < u32 > ( offset_x ) ) ;
utils : : stream_vector ( static_cast < char * > ( buffer ) + 16 , 0 , std : : bit_cast < u32 > ( scale_y ) , 0 , std : : bit_cast < u32 > ( offset_y ) ) ;
utils : : stream_vector ( static_cast < char * > ( buffer ) + 32 , 0 , 0 , std : : bit_cast < u32 > ( scale_z ) , std : : bit_cast < u32 > ( offset_z ) ) ;
utils : : stream_vector ( static_cast < char * > ( buffer ) + 48 , 0 , 0 , 0 , std : : bit_cast < u32 > ( one ) ) ;
2015-10-31 18:08:49 +01:00
}
2017-04-25 12:32:39 +02:00
void thread : : fill_user_clip_data ( void * buffer ) const
{
const rsx : : user_clip_plane_op clip_plane_control [ 6 ] =
{
rsx : : method_registers . clip_plane_0_enabled ( ) ,
rsx : : method_registers . clip_plane_1_enabled ( ) ,
rsx : : method_registers . clip_plane_2_enabled ( ) ,
rsx : : method_registers . clip_plane_3_enabled ( ) ,
rsx : : method_registers . clip_plane_4_enabled ( ) ,
rsx : : method_registers . clip_plane_5_enabled ( ) ,
} ;
2018-10-20 16:43:00 +02:00
u8 data_block [ 64 ] ;
s32 * clip_enabled_flags = reinterpret_cast < s32 * > ( data_block ) ;
f32 * clip_distance_factors = reinterpret_cast < f32 * > ( data_block + 32 ) ;
2017-05-11 00:42:55 +02:00
2017-04-25 12:32:39 +02:00
for ( int index = 0 ; index < 6 ; + + index )
{
switch ( clip_plane_control [ index ] )
{
default :
2020-02-01 09:07:25 +01:00
rsx_log . error ( " bad clip plane control (0x%x) " , static_cast < u8 > ( clip_plane_control [ index ] ) ) ;
2018-09-06 13:28:12 +02:00
[[fallthrough]] ;
2017-04-25 12:32:39 +02:00
case rsx : : user_clip_plane_op : : disable :
2017-05-11 00:42:55 +02:00
clip_enabled_flags [ index ] = 0 ;
clip_distance_factors [ index ] = 0.f ;
2017-04-25 12:32:39 +02:00
break ;
case rsx : : user_clip_plane_op : : greater_or_equal :
2017-05-11 00:42:55 +02:00
clip_enabled_flags [ index ] = 1 ;
clip_distance_factors [ index ] = 1.f ;
2017-04-25 12:32:39 +02:00
break ;
case rsx : : user_clip_plane_op : : less_than :
2017-05-11 00:42:55 +02:00
clip_enabled_flags [ index ] = 1 ;
clip_distance_factors [ index ] = - 1.f ;
2017-04-25 12:32:39 +02:00
break ;
}
}
2017-05-11 00:42:55 +02:00
2018-10-20 16:43:00 +02:00
memcpy ( buffer , data_block , 2 * 8 * sizeof ( u32 ) ) ;
2017-04-25 12:32:39 +02:00
}
2015-10-31 18:19:45 +01:00
/**
* Fill buffer with vertex program constants .
* Buffer must be at least 512 float4 wide .
*/
2022-03-23 20:53:18 +01:00
void thread : : fill_vertex_program_constants_data ( void * buffer , const std : : vector < u16 > & reloc_table )
2015-10-31 18:19:45 +01:00
{
2022-03-23 20:53:18 +01:00
if ( ! reloc_table . empty ( ) ) [[ likely ]]
{
char * dst = reinterpret_cast < char * > ( buffer ) ;
for ( const auto & index : reloc_table )
{
utils : : stream_vector_from_memory ( dst , & rsx : : method_registers . transform_constants [ index ] ) ;
dst + = 16 ;
}
}
2022-03-23 21:59:42 +01:00
else
{
memcpy ( buffer , rsx : : method_registers . transform_constants . data ( ) , 468 * 4 * sizeof ( float ) ) ;
}
2015-10-31 18:19:45 +01:00
}
2021-03-05 20:05:37 +01:00
void thread : : fill_fragment_state_buffer ( void * buffer , const RSXFragmentProgram & /*fragment_program*/ )
2017-02-10 10:08:46 +01:00
{
2020-06-03 20:19:38 +02:00
u32 rop_control = 0u ;
if ( rsx : : method_registers . alpha_test_enabled ( ) )
{
const u32 alpha_func = static_cast < u32 > ( rsx : : method_registers . alpha_func ( ) ) ;
rop_control | = ( alpha_func < < 16 ) ;
rop_control | = ROP_control : : alpha_test_enable ;
}
if ( rsx : : method_registers . polygon_stipple_enabled ( ) )
{
rop_control | = ROP_control : : polygon_stipple_enable ;
}
2018-03-07 21:09:38 +01:00
2019-10-13 21:37:10 +02:00
if ( rsx : : method_registers . msaa_alpha_to_coverage_enabled ( ) & & ! backend_config . supports_hw_a2c )
2018-03-07 21:09:38 +01:00
{
2020-06-03 20:19:38 +02:00
// TODO: Properly support alpha-to-coverage and alpha-to-one behavior in shaders
2019-01-16 18:06:45 +01:00
// Alpha values generate a coverage mask for order independent blending
// Requires hardware AA to work properly (or just fragment sample stage in fragment shaders)
// Simulated using combined alpha blend and alpha test
2020-06-03 20:19:38 +02:00
if ( rsx : : method_registers . msaa_sample_mask ( ) ) rop_control | = ROP_control : : msaa_mask_enable ;
rop_control | = ROP_control : : csaa_enable ;
2019-07-18 19:15:19 +02:00
// Sample configuration bits
switch ( rsx : : method_registers . surface_antialias ( ) )
{
case rsx : : surface_antialiasing : : center_1_sample :
break ;
case rsx : : surface_antialiasing : : diagonal_centered_2_samples :
rop_control | = 1u < < 6 ;
break ;
default :
rop_control | = 3u < < 6 ;
break ;
}
2018-03-07 21:09:38 +01:00
}
2017-02-10 10:08:46 +01:00
const f32 fog0 = rsx : : method_registers . fog_params_0 ( ) ;
const f32 fog1 = rsx : : method_registers . fog_params_1 ( ) ;
2017-08-21 19:56:31 +02:00
const u32 fog_mode = static_cast < u32 > ( rsx : : method_registers . fog_equation ( ) ) ;
2017-09-26 15:24:43 +02:00
2018-03-28 15:00:05 +02:00
if ( rsx : : method_registers . framebuffer_srgb_enabled ( ) )
{
// Check if framebuffer is actually an XRGB format and not a WZYX format
switch ( rsx : : method_registers . surface_color ( ) )
{
case rsx : : surface_color_format : : w16z16y16x16 :
case rsx : : surface_color_format : : w32z32y32x32 :
case rsx : : surface_color_format : : x32 :
break ;
default :
2020-06-03 20:19:38 +02:00
rop_control | = ROP_control : : framebuffer_srgb_enable ;
2018-03-28 15:00:05 +02:00
break ;
}
}
2018-03-23 12:47:03 +01:00
2018-04-29 08:41:51 +02:00
// Generate wpos coefficients
2017-09-26 15:24:43 +02:00
// wpos equation is now as follows:
// wpos.y = (frag_coord / resolution_scale) * ((window_origin!=top)?-1.: 1.) + ((window_origin!=top)? window_height : 0)
// wpos.x = (frag_coord / resolution_scale)
// wpos.zw = frag_coord.zw
const auto window_origin = rsx : : method_registers . shader_window_origin ( ) ;
2017-08-21 19:56:31 +02:00
const u32 window_height = rsx : : method_registers . shader_window_height ( ) ;
2019-12-03 23:34:23 +01:00
const f32 resolution_scale = ( window_height < = static_cast < u32 > ( g_cfg . video . min_scalable_dimension ) ) ? 1.f : rsx : : get_resolution_scale ( ) ;
2017-09-26 15:24:43 +02:00
const f32 wpos_scale = ( window_origin = = rsx : : window_origin : : top ) ? ( 1.f / resolution_scale ) : ( - 1.f / resolution_scale ) ;
const f32 wpos_bias = ( window_origin = = rsx : : window_origin : : top ) ? 0.f : window_height ;
2020-06-03 20:19:38 +02:00
const f32 alpha_ref = rsx : : method_registers . alpha_ref ( ) ;
2017-02-10 10:08:46 +01:00
2017-08-21 19:56:31 +02:00
u32 * dst = static_cast < u32 * > ( buffer ) ;
2022-03-23 20:53:18 +01:00
utils : : stream_vector ( dst , std : : bit_cast < u32 > ( fog0 ) , std : : bit_cast < u32 > ( fog1 ) , rop_control , std : : bit_cast < u32 > ( alpha_ref ) ) ;
utils : : stream_vector ( dst + 4 , 0u , fog_mode , std : : bit_cast < u32 > ( wpos_scale ) , std : : bit_cast < u32 > ( wpos_bias ) ) ;
2018-10-20 16:43:00 +02:00
}
2017-02-10 10:08:46 +01:00
2018-07-24 23:33:55 +02:00
u64 thread : : timestamp ( )
2015-10-11 20:59:46 +02:00
{
2020-04-16 11:54:10 +02:00
const u64 freq = sys_time_get_timebase_frequency ( ) ;
auto get_time_ns = [ freq ] ( )
{
const u64 t = get_timebased_time ( ) ;
return ( t / freq * 1'000'000'000 + t % freq * 1'000'000'000 / freq ) ;
} ;
const u64 t = get_time_ns ( ) ;
2018-07-24 23:33:55 +02:00
if ( t ! = timestamp_ctrl )
{
timestamp_ctrl = t ;
timestamp_subvalue = 0 ;
return t ;
}
2020-04-16 11:54:10 +02:00
// Check if we passed the limit of what fixed increments is legal for
// Wait for the next time value reported if we passed the limit
if ( ( 1'000'000'000 / freq ) - timestamp_subvalue < = 2 )
{
u64 now = get_time_ns ( ) ;
for ( ; t = = now ; now = get_time_ns ( ) )
{
2020-12-21 15:12:05 +01:00
utils : : pause ( ) ;
2020-04-16 11:54:10 +02:00
}
timestamp_ctrl = now ;
timestamp_subvalue = 0 ;
return now ;
}
timestamp_subvalue + = 2 ;
2018-07-24 23:33:55 +02:00
return t + timestamp_subvalue ;
2018-05-23 16:32:36 +02:00
}
2013-11-09 22:29:49 +01:00
2021-05-30 16:10:46 +02:00
std : : span < const std : : byte > thread : : get_raw_index_array ( const draw_clause & draw_indexed_clause ) const
2016-07-31 23:01:31 +02:00
{
2019-06-07 21:56:30 +02:00
if ( ! element_push_buffer . empty ( ) )
2017-03-28 12:41:45 +02:00
{
//Indices provided via immediate mode
2019-12-03 23:34:23 +01:00
return { reinterpret_cast < const std : : byte * > ( element_push_buffer . data ( ) ) , : : narrow < u32 > ( element_push_buffer . size ( ) * sizeof ( u32 ) ) } ;
2017-03-28 12:41:45 +02:00
}
2019-10-21 21:59:34 +02:00
const rsx : : index_array_type type = rsx : : method_registers . index_type ( ) ;
const u32 type_size = get_index_type_size ( type ) ;
2020-01-30 16:44:29 +01:00
// Force aligned indices as realhw
2020-12-09 16:04:52 +01:00
const u32 address = ( 0 - type_size ) & get_address ( rsx : : method_registers . index_array_address ( ) , rsx : : method_registers . index_array_location ( ) ) ;
2016-07-31 23:01:31 +02:00
2021-01-12 11:01:06 +01:00
//const bool is_primitive_restart_enabled = rsx::method_registers.restart_index_enabled();
//const u32 primitive_restart_index = rsx::method_registers.restart_index();
2016-07-31 23:01:31 +02:00
2018-10-01 22:05:51 +02:00
const u32 first = draw_indexed_clause . min_index ( ) ;
const u32 count = draw_indexed_clause . get_elements_count ( ) ;
2017-06-16 15:03:12 +02:00
2019-11-09 15:17:41 +01:00
const auto ptr = vm : : _ptr < const std : : byte > ( address ) ;
2017-06-16 15:03:12 +02:00
return { ptr + first * type_size , count * type_size } ;
2016-07-31 23:01:31 +02:00
}
2016-08-28 17:00:02 +02:00
std : : variant < draw_array_command , draw_indexed_array_command , draw_inlined_array >
thread : : get_draw_command ( const rsx : : rsx_state & state ) const
{
2018-09-24 15:03:25 +02:00
if ( rsx : : method_registers . current_draw_clause . command = = rsx : : draw_command : : array )
{
return draw_array_command { } ;
2016-08-28 17:00:02 +02:00
}
2018-09-24 15:03:25 +02:00
if ( rsx : : method_registers . current_draw_clause . command = = rsx : : draw_command : : indexed )
{
return draw_indexed_array_command
{
2018-10-01 22:05:51 +02:00
get_raw_index_array ( state . current_draw_clause )
2018-09-24 15:03:25 +02:00
} ;
2016-08-28 17:00:02 +02:00
}
2018-09-24 15:03:25 +02:00
if ( rsx : : method_registers . current_draw_clause . command = = rsx : : draw_command : : inlined_array )
{
return draw_inlined_array { } ;
2016-08-28 17:00:02 +02:00
}
2020-12-09 16:04:52 +01:00
fmt : : throw_exception ( " ill-formed draw command " ) ;
2016-08-28 17:00:02 +02:00
}
2018-05-29 13:53:16 +02:00
void thread : : do_local_task ( FIFO_state state )
2018-05-23 11:55:14 +02:00
{
2022-05-16 22:38:11 +02:00
m_eng_interrupt_mask . clear ( rsx : : backend_interrupt ) ;
2022-05-16 19:48:30 +02:00
2018-09-25 21:45:09 +02:00
if ( async_flip_requested & flip_request : : emu_requested )
2018-07-20 16:22:21 +02:00
{
2018-11-30 10:29:35 +01:00
// NOTE: This has to be executed immediately
// Delaying this operation can cause desync due to the delay in firing the flip event
2018-07-20 16:22:21 +02:00
handle_emu_flip ( async_flip_buffer ) ;
}
2018-05-29 13:53:16 +02:00
if ( ! in_begin_end & & state ! = FIFO_state : : lock_wait )
2018-05-23 11:55:14 +02:00
{
2019-03-31 17:56:27 +02:00
if ( atomic_storage < u32 > : : load ( m_invalidated_memory_range . end ) ! = 0 )
2018-09-22 02:14:26 +02:00
{
2019-03-31 17:56:27 +02:00
std : : lock_guard lock ( m_mtx_task ) ;
if ( m_invalidated_memory_range . valid ( ) )
{
handle_invalidated_memory_range ( ) ;
}
2018-05-23 20:51:57 +02:00
}
2018-05-23 11:55:14 +02:00
}
2022-05-20 23:53:48 +02:00
if ( m_eng_interrupt_mask & rsx : : pipe_flush_interrupt )
{
sync ( ) ;
}
2022-07-04 15:02:17 +02:00
if ( is_stopped ( ) )
{
std : : lock_guard lock ( m_mtx_task ) ;
m_invalidated_memory_range = utils : : address_range : : start_end ( 0x2 < < 28 , constants : : local_mem_base + local_mem_size - 1 ) ;
handle_invalidated_memory_range ( ) ;
}
2018-05-23 11:55:14 +02:00
}
2016-01-19 18:23:09 +01:00
std : : array < u32 , 4 > thread : : get_color_surface_addresses ( ) const
{
u32 offset_color [ ] =
{
2021-08-19 07:49:59 +02:00
rsx : : method_registers . surface_offset ( 0 ) ,
rsx : : method_registers . surface_offset ( 1 ) ,
rsx : : method_registers . surface_offset ( 2 ) ,
rsx : : method_registers . surface_offset ( 3 ) ,
2016-01-19 18:23:09 +01:00
} ;
u32 context_dma_color [ ] =
{
2021-08-19 07:49:59 +02:00
rsx : : method_registers . surface_dma ( 0 ) ,
rsx : : method_registers . surface_dma ( 1 ) ,
rsx : : method_registers . surface_dma ( 2 ) ,
rsx : : method_registers . surface_dma ( 3 ) ,
2016-01-19 18:23:09 +01:00
} ;
return
{
2020-12-09 16:04:52 +01:00
rsx : : get_address ( offset_color [ 0 ] , context_dma_color [ 0 ] ) ,
rsx : : get_address ( offset_color [ 1 ] , context_dma_color [ 1 ] ) ,
rsx : : get_address ( offset_color [ 2 ] , context_dma_color [ 2 ] ) ,
rsx : : get_address ( offset_color [ 3 ] , context_dma_color [ 3 ] ) ,
2016-01-19 18:23:09 +01:00
} ;
}
u32 thread : : get_zeta_surface_address ( ) const
{
2016-06-26 23:37:02 +02:00
u32 m_context_dma_z = rsx : : method_registers . surface_z_dma ( ) ;
u32 offset_zeta = rsx : : method_registers . surface_z_offset ( ) ;
2020-12-09 16:04:52 +01:00
return rsx : : get_address ( offset_zeta , m_context_dma_z ) ;
2016-01-19 18:23:09 +01:00
}
2019-08-27 13:55:45 +02:00
void thread : : get_framebuffer_layout ( rsx : : framebuffer_creation_context context , framebuffer_layout & layout )
2018-07-23 23:55:15 +02:00
{
2019-12-20 04:51:16 +01:00
layout = { } ;
2018-07-23 23:55:15 +02:00
layout . ignore_change = true ;
layout . width = rsx : : method_registers . surface_clip_width ( ) ;
layout . height = rsx : : method_registers . surface_clip_height ( ) ;
framebuffer_status_valid = false ;
m_framebuffer_state_contested = false ;
2018-12-12 09:58:44 +01:00
m_current_framebuffer_context = context ;
2018-07-23 23:55:15 +02:00
if ( layout . width = = 0 | | layout . height = = 0 )
{
2020-02-01 09:07:25 +01:00
rsx_log . trace ( " Invalid framebuffer setup, w=%d, h=%d " , layout . width , layout . height ) ;
2019-08-27 13:55:45 +02:00
return ;
2018-07-23 23:55:15 +02:00
}
2021-01-12 11:01:06 +01:00
//const u16 clip_x = rsx::method_registers.surface_clip_origin_x();
//const u16 clip_y = rsx::method_registers.surface_clip_origin_y();
2018-07-23 23:55:15 +02:00
layout . color_addresses = get_color_surface_addresses ( ) ;
layout . zeta_address = get_zeta_surface_address ( ) ;
layout . zeta_pitch = rsx : : method_registers . surface_z_pitch ( ) ;
layout . color_pitch =
{
2021-08-19 07:49:59 +02:00
rsx : : method_registers . surface_pitch ( 0 ) ,
rsx : : method_registers . surface_pitch ( 1 ) ,
rsx : : method_registers . surface_pitch ( 2 ) ,
rsx : : method_registers . surface_pitch ( 3 ) ,
2018-07-23 23:55:15 +02:00
} ;
layout . color_format = rsx : : method_registers . surface_color ( ) ;
layout . depth_format = rsx : : method_registers . surface_depth_fmt ( ) ;
layout . target = rsx : : method_registers . surface_color_target ( ) ;
2019-08-27 13:55:45 +02:00
const auto mrt_buffers = rsx : : utility : : get_rtt_indexes ( layout . target ) ;
2018-07-23 23:55:15 +02:00
const auto aa_mode = rsx : : method_registers . surface_antialias ( ) ;
const u32 aa_factor_u = ( aa_mode = = rsx : : surface_antialiasing : : center_1_sample ) ? 1 : 2 ;
const u32 aa_factor_v = ( aa_mode = = rsx : : surface_antialiasing : : center_1_sample | | aa_mode = = rsx : : surface_antialiasing : : diagonal_centered_2_samples ) ? 1 : 2 ;
2019-05-20 16:14:02 +02:00
const u8 sample_count = get_format_sample_count ( aa_mode ) ;
2018-07-23 23:55:15 +02:00
2020-08-15 13:07:18 +02:00
const auto depth_texel_size = get_format_block_size_in_bytes ( layout . depth_format ) * aa_factor_u ;
2018-12-28 18:09:55 +01:00
const auto color_texel_size = get_format_block_size_in_bytes ( layout . color_format ) * aa_factor_u ;
2020-08-15 13:07:18 +02:00
const bool stencil_test_enabled = is_depth_stencil_format ( layout . depth_format ) & & rsx : : method_registers . stencil_test_enabled ( ) ;
2018-07-23 23:55:15 +02:00
const bool depth_test_enabled = rsx : : method_registers . depth_test_enabled ( ) ;
2019-08-28 14:16:54 +02:00
// Check write masks
2019-11-06 16:17:59 +01:00
layout . zeta_write_enabled = ( depth_test_enabled & & rsx : : method_registers . depth_write_enabled ( ) ) ;
2019-08-30 19:49:57 +02:00
if ( ! layout . zeta_write_enabled & & stencil_test_enabled )
{
// Check if stencil data is modified
auto mask = rsx : : method_registers . stencil_mask ( ) ;
bool active_write_op = ( rsx : : method_registers . stencil_op_zpass ( ) ! = rsx : : stencil_op : : keep | |
rsx : : method_registers . stencil_op_fail ( ) ! = rsx : : stencil_op : : keep | |
rsx : : method_registers . stencil_op_zfail ( ) ! = rsx : : stencil_op : : keep ) ;
if ( ( ! mask | | ! active_write_op ) & & rsx : : method_registers . two_sided_stencil_test_enabled ( ) )
{
mask | = rsx : : method_registers . back_stencil_mask ( ) ;
2020-06-14 13:18:34 +02:00
active_write_op | = ( rsx : : method_registers . back_stencil_op_zpass ( ) ! = rsx : : stencil_op : : keep | |
rsx : : method_registers . back_stencil_op_fail ( ) ! = rsx : : stencil_op : : keep | |
rsx : : method_registers . back_stencil_op_zfail ( ) ! = rsx : : stencil_op : : keep ) ;
2019-08-30 19:49:57 +02:00
}
layout . zeta_write_enabled = ( mask & & active_write_op ) ;
}
2019-08-28 14:16:54 +02:00
// NOTE: surface_target_a is index 1 but is not MRT since only one surface is active
2019-08-27 13:55:45 +02:00
bool color_write_enabled = false ;
2020-02-19 18:03:59 +01:00
for ( uint i = 0 ; i < mrt_buffers . size ( ) ; + + i )
2019-08-27 13:55:45 +02:00
{
2019-08-28 14:16:54 +02:00
if ( rsx : : method_registers . color_write_enabled ( i ) )
2019-08-27 13:55:45 +02:00
{
2019-08-30 14:21:42 +02:00
const auto real_index = mrt_buffers [ i ] ;
layout . color_write_enabled [ real_index ] = true ;
2019-08-27 13:55:45 +02:00
color_write_enabled = true ;
}
}
2018-12-12 09:58:44 +01:00
bool depth_buffer_unused = false , color_buffer_unused = false ;
switch ( context )
2018-07-23 23:55:15 +02:00
{
2018-12-12 09:58:44 +01:00
case rsx : : framebuffer_creation_context : : context_clear_all :
break ;
case rsx : : framebuffer_creation_context : : context_clear_depth :
color_buffer_unused = true ;
break ;
case rsx : : framebuffer_creation_context : : context_clear_color :
depth_buffer_unused = true ;
break ;
case rsx : : framebuffer_creation_context : : context_draw :
// NOTE: As with all other hw, depth/stencil writes involve the corresponding depth/stencil test, i.e No test = No write
2019-11-06 16:17:59 +01:00
// NOTE: Depth test is not really using the memory if its set to always or never
// TODO: Perform similar checks for stencil test
if ( ! stencil_test_enabled )
{
if ( ! depth_test_enabled )
{
depth_buffer_unused = true ;
}
else if ( ! rsx : : method_registers . depth_write_enabled ( ) )
{
// Depth test is enabled but depth write is disabled
switch ( rsx : : method_registers . depth_func ( ) )
{
default :
break ;
case rsx : : comparison_function : : never :
case rsx : : comparison_function : : always :
// No access to depth buffer memory
depth_buffer_unused = true ;
break ;
}
}
2020-07-09 18:29:41 +02:00
if ( depth_buffer_unused ) [[unlikely]]
{
// Check if depth bounds is active. Depth bounds test does NOT need depth test to be enabled to access the Z buffer
// Bind Z buffer in read mode for bounds check in this case
if ( rsx : : method_registers . depth_bounds_test_enabled ( ) & &
( rsx : : method_registers . depth_bounds_min ( ) > 0.f | | rsx : : method_registers . depth_bounds_max ( ) < 1.f ) )
{
depth_buffer_unused = false ;
}
}
2019-11-06 16:17:59 +01:00
}
2018-12-12 09:58:44 +01:00
color_buffer_unused = ! color_write_enabled | | layout . target = = rsx : : surface_target : : none ;
m_framebuffer_state_contested = color_buffer_unused | | depth_buffer_unused ;
break ;
default :
2020-12-09 16:04:52 +01:00
fmt : : throw_exception ( " Unknown framebuffer context 0x%x " , static_cast < u32 > ( context ) ) ;
2018-12-12 09:58:44 +01:00
}
// Swizzled render does tight packing of bytes
2019-03-03 13:47:17 +01:00
bool packed_render = false ;
2018-12-28 18:09:55 +01:00
u32 minimum_color_pitch = 64u ;
u32 minimum_zeta_pitch = 64u ;
2020-08-01 13:27:13 +02:00
switch ( layout . raster_type = rsx : : method_registers . surface_type ( ) )
2019-03-03 13:47:17 +01:00
{
default :
2020-08-01 13:27:13 +02:00
rsx_log . error ( " Unknown raster mode 0x%x " , static_cast < u32 > ( layout . raster_type ) ) ;
2019-03-03 13:47:17 +01:00
[[fallthrough]] ;
case rsx : : surface_raster_type : : linear :
break ;
case rsx : : surface_raster_type : : swizzle :
packed_render = true ;
break ;
2021-04-09 21:12:47 +02:00
}
2019-03-03 13:47:17 +01:00
2018-12-28 18:09:55 +01:00
if ( ! packed_render )
{
// Well, this is a write operation either way (clearing or drawing)
// We can deduce a minimum pitch for which this operation is guaranteed to require by checking for the lesser of scissor or clip
const u32 write_limit_x = std : : min < u32 > ( layout . width , rsx : : method_registers . scissor_origin_x ( ) + rsx : : method_registers . scissor_width ( ) ) ;
minimum_color_pitch = color_texel_size * write_limit_x ;
minimum_zeta_pitch = depth_texel_size * write_limit_x ;
}
2018-12-12 09:58:44 +01:00
if ( depth_buffer_unused )
{
layout . zeta_address = 0 ;
}
2018-12-28 18:09:55 +01:00
else if ( layout . zeta_pitch < minimum_zeta_pitch )
2018-12-12 09:58:44 +01:00
{
layout . zeta_address = 0 ;
}
2019-07-09 20:32:21 +02:00
else if ( packed_render )
{
layout . actual_zeta_pitch = ( layout . width * depth_texel_size ) ;
}
2018-12-12 09:58:44 +01:00
else
{
2019-07-09 20:32:21 +02:00
const auto packed_zeta_pitch = ( layout . width * depth_texel_size ) ;
if ( packed_zeta_pitch > layout . zeta_pitch )
{
layout . width = ( layout . zeta_pitch / depth_texel_size ) ;
}
layout . actual_zeta_pitch = layout . zeta_pitch ;
2018-07-23 23:55:15 +02:00
}
for ( const auto & index : rsx : : utility : : get_rtt_indexes ( layout . target ) )
{
2018-12-12 09:58:44 +01:00
if ( color_buffer_unused )
2018-07-23 23:55:15 +02:00
{
2018-12-12 09:58:44 +01:00
layout . color_addresses [ index ] = 0 ;
continue ;
}
2018-07-23 23:55:15 +02:00
2018-12-28 18:09:55 +01:00
if ( layout . color_pitch [ index ] < minimum_color_pitch )
2018-12-12 09:58:44 +01:00
{
// Unlike the depth buffer, when given a color target we know it is intended to be rendered to
2020-02-01 09:07:25 +01:00
rsx_log . error ( " Framebuffer setup error: Color target failed pitch check, Pitch=[%d, %d, %d, %d] + %d, target=%d, context=%d " ,
2018-12-12 09:58:44 +01:00
layout . color_pitch [ 0 ] , layout . color_pitch [ 1 ] , layout . color_pitch [ 2 ] , layout . color_pitch [ 3 ] ,
2019-12-03 23:34:23 +01:00
layout . zeta_pitch , static_cast < u32 > ( layout . target ) , static_cast < u32 > ( context ) ) ;
2018-12-12 09:58:44 +01:00
// Do not remove this buffer for now as it implies something went horribly wrong anyway
break ;
2018-07-23 23:55:15 +02:00
}
2018-12-12 09:58:44 +01:00
if ( layout . color_addresses [ index ] = = layout . zeta_address )
2018-07-23 23:55:15 +02:00
{
2020-02-01 09:07:25 +01:00
rsx_log . warning ( " Framebuffer at 0x%X has aliasing color/depth targets, color_index=%d, zeta_pitch = %d, color_pitch=%d, context=%d " ,
2019-12-03 23:34:23 +01:00
layout . zeta_address , index , layout . zeta_pitch , layout . color_pitch [ index ] , static_cast < u32 > ( context ) ) ;
2018-07-23 23:55:15 +02:00
2018-12-12 09:58:44 +01:00
m_framebuffer_state_contested = true ;
2018-07-23 23:55:15 +02:00
// TODO: Research clearing both depth AND color
// TODO: If context is creation_draw, deal with possibility of a lost buffer clear
2019-08-27 13:55:45 +02:00
if ( depth_test_enabled | | stencil_test_enabled | | ( ! layout . color_write_enabled [ index ] & & layout . zeta_write_enabled ) )
2018-07-23 23:55:15 +02:00
{
// Use address for depth data
layout . color_addresses [ index ] = 0 ;
2018-12-12 09:58:44 +01:00
continue ;
2018-07-23 23:55:15 +02:00
}
else
{
// Use address for color data
layout . zeta_address = 0 ;
}
}
2020-12-09 08:47:45 +01:00
ensure ( layout . color_addresses [ index ] ) ;
2018-12-12 09:58:44 +01:00
2019-07-09 20:32:21 +02:00
const auto packed_pitch = ( layout . width * color_texel_size ) ;
if ( packed_render )
{
layout . actual_color_pitch [ index ] = packed_pitch ;
}
else
{
if ( packed_pitch > layout . color_pitch [ index ] )
{
layout . width = ( layout . color_pitch [ index ] / color_texel_size ) ;
}
layout . actual_color_pitch [ index ] = layout . color_pitch [ index ] ;
}
2018-12-12 09:58:44 +01:00
framebuffer_status_valid = true ;
2018-07-23 23:55:15 +02:00
}
if ( ! framebuffer_status_valid & & ! layout . zeta_address )
{
2020-02-01 09:07:25 +01:00
rsx_log . warning ( " Framebuffer setup failed. Draw calls may have been lost " ) ;
2019-08-27 13:55:45 +02:00
return ;
2018-07-23 23:55:15 +02:00
}
// At least one attachment exists
framebuffer_status_valid = true ;
// Window (raster) offsets
const auto window_offset_x = rsx : : method_registers . window_offset_x ( ) ;
const auto window_offset_y = rsx : : method_registers . window_offset_y ( ) ;
const auto window_clip_width = rsx : : method_registers . window_clip_horizontal ( ) ;
const auto window_clip_height = rsx : : method_registers . window_clip_vertical ( ) ;
if ( window_offset_x | | window_offset_y )
{
// Window offset is what affects the raster position!
// Tested with Turbo: Super stunt squad that only changes the window offset to declare new framebuffers
// Sampling behavior clearly indicates the addresses are expected to have changed
if ( auto clip_type = rsx : : method_registers . window_clip_type ( ) )
2020-12-09 16:04:52 +01:00
rsx_log . error ( " Unknown window clip type 0x%X " , clip_type ) ;
2018-07-23 23:55:15 +02:00
for ( const auto & index : rsx : : utility : : get_rtt_indexes ( layout . target ) )
{
if ( layout . color_addresses [ index ] )
{
2018-12-28 18:09:55 +01:00
const u32 window_offset_bytes = ( layout . actual_color_pitch [ index ] * window_offset_y ) + ( color_texel_size * window_offset_x ) ;
2018-07-23 23:55:15 +02:00
layout . color_addresses [ index ] + = window_offset_bytes ;
}
}
if ( layout . zeta_address )
{
2018-12-28 18:09:55 +01:00
layout . zeta_address + = ( layout . actual_zeta_pitch * window_offset_y ) + ( depth_texel_size * window_offset_x ) ;
2018-07-23 23:55:15 +02:00
}
}
if ( ( window_clip_width & & window_clip_width < layout . width ) | |
( window_clip_height & & window_clip_height < layout . height ) )
{
2020-02-01 09:07:25 +01:00
rsx_log . error ( " Unexpected window clip dimensions: window_clip=%dx%d, surface_clip=%dx%d " ,
2018-07-23 23:55:15 +02:00
window_clip_width , window_clip_height , layout . width , layout . height ) ;
}
layout . aa_mode = aa_mode ;
layout . aa_factors [ 0 ] = aa_factor_u ;
layout . aa_factors [ 1 ] = aa_factor_v ;
bool really_changed = false ;
for ( u8 i = 0 ; i < rsx : : limits : : color_buffers_count ; + + i )
{
if ( m_surface_info [ i ] . address ! = layout . color_addresses [ i ] )
{
really_changed = true ;
break ;
}
if ( layout . color_addresses [ i ] )
{
if ( m_surface_info [ i ] . width ! = layout . width | |
2019-05-20 16:14:02 +02:00
m_surface_info [ i ] . height ! = layout . height | |
2019-08-14 00:38:31 +02:00
m_surface_info [ i ] . color_format ! = layout . color_format | |
2019-05-20 16:14:02 +02:00
m_surface_info [ i ] . samples ! = sample_count )
2018-07-23 23:55:15 +02:00
{
really_changed = true ;
break ;
}
}
}
if ( ! really_changed )
{
2019-05-20 16:14:02 +02:00
if ( layout . zeta_address = = m_depth_surface_info . address & &
2019-08-14 00:38:31 +02:00
layout . depth_format = = m_depth_surface_info . depth_format & &
2019-05-20 16:14:02 +02:00
sample_count = = m_depth_surface_info . samples )
2018-07-23 23:55:15 +02:00
{
// Same target is reused
2019-08-27 13:55:45 +02:00
return ;
2018-07-23 23:55:15 +02:00
}
}
layout . ignore_change = false ;
}
2020-06-14 13:18:34 +02:00
void thread : : on_framebuffer_options_changed ( u32 opt )
{
2022-09-27 18:57:58 +02:00
if ( m_rtts_dirty )
{
// Nothing to do
return ;
}
2020-06-14 13:18:34 +02:00
auto evaluate_depth_buffer_state = [ & ] ( )
{
m_framebuffer_layout . zeta_write_enabled =
( rsx : : method_registers . depth_test_enabled ( ) & & rsx : : method_registers . depth_write_enabled ( ) ) ;
} ;
auto evaluate_stencil_buffer_state = [ & ] ( )
{
if ( ! m_framebuffer_layout . zeta_write_enabled & &
rsx : : method_registers . stencil_test_enabled ( ) & &
2020-08-15 13:07:18 +02:00
is_depth_stencil_format ( m_framebuffer_layout . depth_format ) )
2020-06-14 13:18:34 +02:00
{
// Check if stencil data is modified
auto mask = rsx : : method_registers . stencil_mask ( ) ;
bool active_write_op = ( rsx : : method_registers . stencil_op_zpass ( ) ! = rsx : : stencil_op : : keep | |
rsx : : method_registers . stencil_op_fail ( ) ! = rsx : : stencil_op : : keep | |
rsx : : method_registers . stencil_op_zfail ( ) ! = rsx : : stencil_op : : keep ) ;
if ( ( ! mask | | ! active_write_op ) & & rsx : : method_registers . two_sided_stencil_test_enabled ( ) )
{
mask | = rsx : : method_registers . back_stencil_mask ( ) ;
active_write_op | = ( rsx : : method_registers . back_stencil_op_zpass ( ) ! = rsx : : stencil_op : : keep | |
rsx : : method_registers . back_stencil_op_fail ( ) ! = rsx : : stencil_op : : keep | |
rsx : : method_registers . back_stencil_op_zfail ( ) ! = rsx : : stencil_op : : keep ) ;
}
m_framebuffer_layout . zeta_write_enabled = ( mask & & active_write_op ) ;
}
} ;
auto evaluate_color_buffer_state = [ & ] ( ) - > bool
{
const auto mrt_buffers = rsx : : utility : : get_rtt_indexes ( m_framebuffer_layout . target ) ;
bool any_found = false ;
for ( uint i = 0 ; i < mrt_buffers . size ( ) ; + + i )
{
if ( rsx : : method_registers . color_write_enabled ( i ) )
{
const auto real_index = mrt_buffers [ i ] ;
m_framebuffer_layout . color_write_enabled [ real_index ] = true ;
any_found = true ;
}
}
return any_found ;
} ;
2020-06-14 22:00:40 +02:00
auto evaluate_depth_buffer_contested = [ & ] ( )
{
if ( m_framebuffer_layout . zeta_address ) [[likely]]
{
// Nothing to do, depth buffer already exists
return false ;
}
// Check if depth read/write is enabled
if ( m_framebuffer_layout . zeta_write_enabled | |
rsx : : method_registers . depth_test_enabled ( ) )
{
return true ;
}
// Check if stencil read is enabled
2020-08-15 13:07:18 +02:00
if ( is_depth_stencil_format ( m_framebuffer_layout . depth_format ) & &
2020-06-14 22:00:40 +02:00
rsx : : method_registers . stencil_test_enabled ( ) )
{
return true ;
}
return false ;
} ;
2020-06-14 13:18:34 +02:00
switch ( opt )
{
case NV4097_SET_DEPTH_TEST_ENABLE :
case NV4097_SET_DEPTH_MASK :
2020-11-29 19:27:17 +01:00
case NV4097_SET_DEPTH_FUNC :
2020-06-14 13:18:34 +02:00
{
evaluate_depth_buffer_state ( ) ;
2020-06-14 22:00:40 +02:00
if ( m_framebuffer_state_contested )
2020-06-14 13:18:34 +02:00
{
2020-06-14 22:00:40 +02:00
m_rtts_dirty | = evaluate_depth_buffer_contested ( ) ;
2020-06-14 13:18:34 +02:00
}
break ;
}
case NV4097_SET_TWO_SIDED_STENCIL_TEST_ENABLE :
case NV4097_SET_STENCIL_TEST_ENABLE :
case NV4097_SET_STENCIL_MASK :
case NV4097_SET_STENCIL_OP_ZPASS :
case NV4097_SET_STENCIL_OP_FAIL :
case NV4097_SET_STENCIL_OP_ZFAIL :
case NV4097_SET_BACK_STENCIL_MASK :
case NV4097_SET_BACK_STENCIL_OP_ZPASS :
case NV4097_SET_BACK_STENCIL_OP_FAIL :
case NV4097_SET_BACK_STENCIL_OP_ZFAIL :
{
// Stencil takes a back seat to depth buffer stuff
evaluate_depth_buffer_state ( ) ;
if ( ! m_framebuffer_layout . zeta_write_enabled )
{
evaluate_stencil_buffer_state ( ) ;
}
2020-06-14 22:00:40 +02:00
if ( m_framebuffer_state_contested )
2020-06-14 13:18:34 +02:00
{
2020-06-14 22:00:40 +02:00
m_rtts_dirty | = evaluate_depth_buffer_contested ( ) ;
2020-06-14 13:18:34 +02:00
}
break ;
}
case NV4097_SET_COLOR_MASK :
case NV4097_SET_COLOR_MASK_MRT :
{
if ( ! m_framebuffer_state_contested ) [[likely]]
{
// Update write masks and continue
evaluate_color_buffer_state ( ) ;
}
else
{
bool old_state = false ;
for ( const auto & enabled : m_framebuffer_layout . color_write_enabled )
{
2020-06-14 22:00:40 +02:00
if ( old_state = enabled ; old_state ) break ;
2020-06-14 13:18:34 +02:00
}
const auto new_state = evaluate_color_buffer_state ( ) ;
if ( ! old_state & & new_state )
{
// Color buffers now in use
m_rtts_dirty = true ;
}
}
break ;
}
default :
rsx_log . fatal ( " Unhandled framebuffer option changed 0x%x " , opt ) ;
}
}
2019-07-20 13:58:05 +02:00
bool thread : : get_scissor ( areau & region , bool clip_viewport )
2019-07-18 15:50:21 +02:00
{
if ( ! ( m_graphics_state & rsx : : pipeline_state : : scissor_config_state_dirty ) )
{
2019-07-20 13:58:05 +02:00
if ( clip_viewport = = ! ! ( m_graphics_state & rsx : : pipeline_state : : scissor_setup_clipped ) )
{
// Nothing to do
return false ;
}
2019-07-18 15:50:21 +02:00
}
2019-07-20 13:58:05 +02:00
m_graphics_state & = ~ ( rsx : : pipeline_state : : scissor_config_state_dirty | rsx : : pipeline_state : : scissor_setup_clipped ) ;
u16 x1 , x2 , y1 , y2 ;
2019-07-18 15:50:21 +02:00
u16 scissor_x = rsx : : method_registers . scissor_origin_x ( ) ;
u16 scissor_w = rsx : : method_registers . scissor_width ( ) ;
u16 scissor_y = rsx : : method_registers . scissor_origin_y ( ) ;
u16 scissor_h = rsx : : method_registers . scissor_height ( ) ;
2019-07-20 13:58:05 +02:00
if ( clip_viewport )
{
u16 raster_x = rsx : : method_registers . viewport_origin_x ( ) ;
u16 raster_w = rsx : : method_registers . viewport_width ( ) ;
u16 raster_y = rsx : : method_registers . viewport_origin_y ( ) ;
u16 raster_h = rsx : : method_registers . viewport_height ( ) ;
// Get the minimum area between these two
x1 = std : : max ( scissor_x , raster_x ) ;
y1 = std : : max ( scissor_y , raster_y ) ;
x2 = std : : min ( scissor_x + scissor_w , raster_x + raster_w ) ;
y2 = std : : min ( scissor_y + scissor_h , raster_y + raster_h ) ;
m_graphics_state | = rsx : : pipeline_state : : scissor_setup_clipped ;
}
else
{
x1 = scissor_x ;
x2 = scissor_x + scissor_w ;
y1 = scissor_y ;
y2 = scissor_y + scissor_h ;
}
2019-07-18 15:50:21 +02:00
if ( x2 < = x1 | |
y2 < = y1 | |
x1 > = rsx : : method_registers . window_clip_horizontal ( ) | |
y1 > = rsx : : method_registers . window_clip_vertical ( ) )
{
m_graphics_state | = rsx : : pipeline_state : : scissor_setup_invalid ;
framebuffer_status_valid = false ;
return false ;
}
if ( m_graphics_state & rsx : : pipeline_state : : scissor_setup_invalid )
{
m_graphics_state & = ~ rsx : : pipeline_state : : scissor_setup_invalid ;
framebuffer_status_valid = true ;
}
2020-11-29 17:30:55 +01:00
std : : tie ( region . x1 , region . y1 ) = rsx : : apply_resolution_scale < false > ( x1 , y1 , m_framebuffer_layout . width , m_framebuffer_layout . height ) ;
std : : tie ( region . x2 , region . y2 ) = rsx : : apply_resolution_scale < true > ( x2 , y2 , m_framebuffer_layout . width , m_framebuffer_layout . height ) ;
2019-07-18 15:50:21 +02:00
return true ;
}
2020-12-03 19:11:32 +01:00
void thread : : prefetch_fragment_program ( )
2016-01-18 20:10:55 +01:00
{
2020-12-03 19:11:32 +01:00
if ( ! ( m_graphics_state & rsx : : pipeline_state : : fragment_program_ucode_dirty ) )
return ;
m_graphics_state & = ~ rsx : : pipeline_state : : fragment_program_ucode_dirty ;
2022-03-24 19:31:17 +01:00
// Request for update of fragment constants if the program block is invalidated
m_graphics_state | = rsx : : pipeline_state : : fragment_constants_dirty ;
2020-12-03 19:11:32 +01:00
const auto [ program_offset , program_location ] = method_registers . shader_program_address ( ) ;
2021-11-08 18:58:24 +01:00
const auto prev_textures_reference_mask = current_fp_metadata . referenced_textures_mask ;
2020-12-09 16:04:52 +01:00
auto data_ptr = vm : : base ( rsx : : get_address ( program_offset , program_location ) ) ;
2020-12-03 19:11:32 +01:00
current_fp_metadata = program_hash_util : : fragment_program_utils : : analyse_fragment_program ( data_ptr ) ;
current_fragment_program . data = ( static_cast < u8 * > ( data_ptr ) + current_fp_metadata . program_start_offset ) ;
current_fragment_program . offset = program_offset + current_fp_metadata . program_start_offset ;
current_fragment_program . ucode_length = current_fp_metadata . program_ucode_length ;
current_fragment_program . total_length = current_fp_metadata . program_ucode_length + current_fp_metadata . program_start_offset ;
2021-05-12 23:56:01 +02:00
current_fragment_program . texture_state . import ( current_fp_texture_state , current_fp_metadata . referenced_textures_mask ) ;
2020-12-03 19:11:32 +01:00
current_fragment_program . valid = true ;
if ( ! ( m_graphics_state & rsx : : pipeline_state : : fragment_program_state_dirty ) )
{
// Verify current texture state is valid
for ( u32 textures_ref = current_fp_metadata . referenced_textures_mask , i = 0 ; textures_ref ; textures_ref > > = 1 , + + i )
{
if ( ! ( textures_ref & 1 ) ) continue ;
if ( m_textures_dirty [ i ] )
{
m_graphics_state | = rsx : : pipeline_state : : fragment_program_state_dirty ;
break ;
}
}
}
2021-11-08 18:58:24 +01:00
if ( ! ( m_graphics_state & rsx : : pipeline_state : : fragment_program_state_dirty ) & &
( prev_textures_reference_mask ! = current_fp_metadata . referenced_textures_mask ) )
{
// If different textures are used, upload their coefficients.
// The texture parameters transfer routine is optimized and only writes data for textures consumed by the ucode.
m_graphics_state | = rsx : : pipeline_state : : fragment_texture_state_dirty ;
}
2020-12-03 19:11:32 +01:00
}
void thread : : prefetch_vertex_program ( )
{
if ( ! ( m_graphics_state & rsx : : pipeline_state : : vertex_program_ucode_dirty ) )
2018-04-10 17:06:29 +02:00
return ;
2020-12-03 19:11:32 +01:00
m_graphics_state & = ~ rsx : : pipeline_state : : vertex_program_ucode_dirty ;
2022-03-24 19:31:17 +01:00
// Reload transform constants unconditionally for now
m_graphics_state | = rsx : : pipeline_state : : transform_constants_dirty ;
2017-03-25 22:59:57 +01:00
const u32 transform_program_start = rsx : : method_registers . transform_program_start ( ) ;
2018-07-01 19:37:05 +02:00
current_vertex_program . data . reserve ( 512 * 4 ) ;
current_vertex_program . jump_table . clear ( ) ;
2017-09-14 13:37:14 +02:00
2018-07-01 19:37:05 +02:00
current_vp_metadata = program_hash_util : : vertex_program_utils : : analyse_vertex_program
(
method_registers . transform_program . data ( ) , // Input raw block
transform_program_start , // Address of entry point
current_vertex_program // [out] Program object
) ;
2016-01-18 20:10:55 +01:00
2021-05-12 23:56:01 +02:00
current_vertex_program . texture_state . import ( current_vp_texture_state , current_vp_metadata . referenced_textures_mask ) ;
2020-12-03 19:11:32 +01:00
if ( ! ( m_graphics_state & rsx : : pipeline_state : : vertex_program_state_dirty ) )
2018-07-09 20:31:31 +02:00
{
2020-12-03 19:11:32 +01:00
// Verify current texture state is valid
for ( u32 textures_ref = current_vp_metadata . referenced_textures_mask , i = 0 ; textures_ref ; textures_ref > > = 1 , + + i )
2018-07-09 20:31:31 +02:00
{
2020-12-03 19:11:32 +01:00
if ( ! ( textures_ref & 1 ) ) continue ;
if ( m_vertex_textures_dirty [ i ] )
2018-07-09 20:31:31 +02:00
{
2020-12-03 19:11:32 +01:00
m_graphics_state | = rsx : : pipeline_state : : vertex_program_state_dirty ;
break ;
2018-07-09 20:31:31 +02:00
}
}
}
2020-12-03 19:11:32 +01:00
}
2020-12-13 11:39:58 +01:00
void thread : : analyse_current_rsx_pipeline ( )
{
prefetch_vertex_program ( ) ;
prefetch_fragment_program ( ) ;
}
2020-12-03 19:11:32 +01:00
void thread : : get_current_vertex_program ( const std : : array < std : : unique_ptr < rsx : : sampled_image_descriptor_base > , rsx : : limits : : vertex_textures_count > & sampler_descriptors )
{
if ( ! ( m_graphics_state & rsx : : pipeline_state : : vertex_program_dirty ) )
return ;
2020-12-09 08:47:45 +01:00
ensure ( ! ( m_graphics_state & rsx : : pipeline_state : : vertex_program_ucode_dirty ) ) ;
2020-12-03 19:11:32 +01:00
current_vertex_program . output_mask = rsx : : method_registers . vertex_attrib_output_mask ( ) ;
2018-07-09 20:31:31 +02:00
2020-12-03 19:11:32 +01:00
for ( u32 textures_ref = current_vp_metadata . referenced_textures_mask , i = 0 ; textures_ref ; textures_ref > > = 1 , + + i )
2016-01-18 20:10:55 +01:00
{
2020-12-03 19:11:32 +01:00
if ( ! ( textures_ref & 1 ) ) continue ;
2016-01-18 20:10:55 +01:00
2020-12-03 19:11:32 +01:00
const auto & tex = rsx : : method_registers . vertex_textures [ i ] ;
if ( tex . enabled ( ) & & ( current_vp_metadata . referenced_textures_mask & ( 1 < < i ) ) )
2016-01-18 20:10:55 +01:00
{
2021-05-12 23:56:01 +02:00
current_vp_texture_state . clear ( i ) ;
current_vp_texture_state . set_dimension ( sampler_descriptors [ i ] - > image_type , i ) ;
2022-03-13 09:32:04 +01:00
if ( backend_config . supports_hw_msaa & &
sampler_descriptors [ i ] - > samples > 1 )
{
current_vp_texture_state . multisampled_textures | = ( 1 < < i ) ;
}
2016-01-18 20:10:55 +01:00
}
}
2021-05-12 23:56:01 +02:00
current_vertex_program . texture_state . import ( current_vp_texture_state , current_vp_metadata . referenced_textures_mask ) ;
2017-07-31 13:38:28 +02:00
}
2021-11-13 21:47:07 +01:00
void thread : : analyse_inputs_interleaved ( vertex_input_layout & result )
2017-07-31 13:38:28 +02:00
{
const rsx_state & state = rsx : : method_registers ;
2021-09-15 19:46:03 +02:00
const u32 input_mask = state . vertex_attrib_input_mask ( ) & current_vp_metadata . referenced_inputs_mask ;
2017-07-31 13:38:28 +02:00
2019-01-21 19:07:27 +01:00
result . clear ( ) ;
2017-07-31 13:38:28 +02:00
if ( state . current_draw_clause . command = = rsx : : draw_command : : inlined_array )
{
2022-09-07 23:02:52 +02:00
interleaved_range_info & info = * result . alloc_interleaved_block ( ) ;
2017-07-31 13:38:28 +02:00
info . interleaved = true ;
for ( u8 index = 0 ; index < rsx : : limits : : vertex_count ; + + index )
{
auto & vinfo = state . vertex_arrays_info [ index ] ;
if ( vinfo . size ( ) > 0 )
{
2018-11-11 19:56:25 +01:00
// Stride must be updated even if the stream is disabled
2017-07-31 13:38:28 +02:00
info . attribute_stride + = rsx : : get_vertex_type_size_on_host ( vinfo . type ( ) , vinfo . size ( ) ) ;
2019-01-21 19:07:27 +01:00
info . locations . push_back ( { index , false , 1 } ) ;
2018-11-03 16:10:22 +01:00
2018-12-02 18:36:50 +01:00
if ( input_mask & ( 1u < < index ) )
2018-11-03 16:10:22 +01:00
{
result . attribute_placement [ index ] = attribute_buffer_placement : : transient ;
}
2018-10-20 15:00:53 +02:00
}
2018-11-03 16:10:22 +01:00
else if ( state . register_vertex_info [ index ] . size > 0 & & input_mask & ( 1u < < index ) )
2018-10-20 15:00:53 +02:00
{
//Reads from register
result . referenced_registers . push_back ( index ) ;
2018-11-03 16:10:22 +01:00
result . attribute_placement [ index ] = attribute_buffer_placement : : transient ;
2017-07-31 13:38:28 +02:00
}
}
2019-07-06 11:48:48 +02:00
if ( info . attribute_stride )
{
// At least one array feed must be enabled for vertex input
2022-09-07 23:02:52 +02:00
result . interleaved_blocks . push_back ( & info ) ;
2019-07-06 11:48:48 +02:00
}
2019-01-21 19:07:27 +01:00
return ;
2017-07-31 13:38:28 +02:00
}
const u32 frequency_divider_mask = rsx : : method_registers . frequency_divider_operation_mask ( ) ;
2018-10-01 22:05:51 +02:00
result . interleaved_blocks . reserve ( 16 ) ;
result . referenced_registers . reserve ( 16 ) ;
2017-07-31 13:38:28 +02:00
2021-09-15 19:46:03 +02:00
for ( auto [ ref_mask , index ] = std : : tuple { input_mask , u8 ( 0 ) } ; ref_mask ; + + index , ref_mask > > = 1 )
2017-07-31 13:38:28 +02:00
{
2021-09-15 19:46:03 +02:00
ensure ( index < rsx : : limits : : vertex_count ) ;
if ( ! ( ref_mask & 1u ) )
{
// Nothing to do, uninitialized
2017-07-31 13:38:28 +02:00
continue ;
2021-09-15 19:46:03 +02:00
}
2017-07-31 13:38:28 +02:00
2018-04-01 12:18:57 +02:00
//Check for interleaving
const auto & info = state . vertex_arrays_info [ index ] ;
2018-11-24 22:31:16 +01:00
if ( rsx : : method_registers . current_draw_clause . is_immediate_draw & &
rsx : : method_registers . current_draw_clause . command ! = rsx : : draw_command : : indexed )
2017-07-31 13:38:28 +02:00
{
2018-11-24 22:31:16 +01:00
// NOTE: In immediate rendering mode, all vertex setup is ignored
// Observed with GT5, immediate render bypasses array pointers completely, even falling back to fixed-function register defaults
2018-04-01 12:18:57 +02:00
if ( vertex_push_buffers [ index ] . vertex_count > 1 )
{
2021-11-13 21:47:07 +01:00
// Ensure consistent number of vertices per attribute.
vertex_push_buffers [ index ] . pad_to ( vertex_push_buffers [ 0 ] . vertex_count , false ) ;
2018-11-24 22:31:16 +01:00
// Read temp buffer (register array)
2018-04-01 12:18:57 +02:00
std : : pair < u8 , u32 > volatile_range_info = std : : make_pair ( index , static_cast < u32 > ( vertex_push_buffers [ index ] . data . size ( ) * sizeof ( u32 ) ) ) ;
result . volatile_blocks . push_back ( volatile_range_info ) ;
result . attribute_placement [ index ] = attribute_buffer_placement : : transient ;
2018-11-24 22:31:16 +01:00
}
else if ( state . register_vertex_info [ index ] . size > 0 )
{
// Reads from register
result . referenced_registers . push_back ( index ) ;
result . attribute_placement [ index ] = attribute_buffer_placement : : transient ;
2018-04-01 12:18:57 +02:00
}
2018-11-24 22:31:16 +01:00
// Fall back to the default register value if no source is specified via register
continue ;
2017-07-31 13:38:28 +02:00
}
2018-04-01 12:18:57 +02:00
if ( ! info . size ( ) )
2017-07-31 13:38:28 +02:00
{
2018-04-01 12:18:57 +02:00
if ( state . register_vertex_info [ index ] . size > 0 )
{
//Reads from register
result . referenced_registers . push_back ( index ) ;
result . attribute_placement [ index ] = attribute_buffer_placement : : transient ;
continue ;
}
2017-07-31 13:38:28 +02:00
}
2018-04-01 12:18:57 +02:00
else
2017-07-31 13:38:28 +02:00
{
result . attribute_placement [ index ] = attribute_buffer_placement : : persistent ;
const u32 base_address = info . offset ( ) & 0x7fffffff ;
bool alloc_new_block = true ;
2019-01-21 19:07:27 +01:00
bool modulo = ! ! ( frequency_divider_mask & ( 1 < < index ) ) ;
2017-07-31 13:38:28 +02:00
for ( auto & block : result . interleaved_blocks )
{
2022-09-07 23:02:52 +02:00
if ( block - > single_vertex )
2017-08-13 21:09:26 +02:00
{
//Single vertex definition, continue
continue ;
}
2022-09-07 23:02:52 +02:00
if ( block - > attribute_stride ! = info . stride ( ) )
2017-07-31 13:38:28 +02:00
{
//Stride does not match, continue
continue ;
}
2022-09-07 23:02:52 +02:00
if ( base_address > block - > base_offset )
2017-07-31 13:38:28 +02:00
{
2022-09-07 23:02:52 +02:00
const u32 diff = base_address - block - > base_offset ;
2017-07-31 13:38:28 +02:00
if ( diff > info . stride ( ) )
{
//Not interleaved, continue
continue ;
}
}
else
{
2022-09-07 23:02:52 +02:00
const u32 diff = block - > base_offset - base_address ;
2017-07-31 13:38:28 +02:00
if ( diff > info . stride ( ) )
{
//Not interleaved, continue
continue ;
}
//Matches, and this address is lower than existing
2022-09-07 23:02:52 +02:00
block - > base_offset = base_address ;
2017-07-31 13:38:28 +02:00
}
alloc_new_block = false ;
2022-09-07 23:02:52 +02:00
block - > locations . push_back ( { index , modulo , info . frequency ( ) } ) ;
block - > interleaved = true ;
2017-07-31 13:38:28 +02:00
break ;
}
if ( alloc_new_block )
{
2022-09-07 23:02:52 +02:00
interleaved_range_info & block = * result . alloc_interleaved_block ( ) ;
2017-07-31 13:38:28 +02:00
block . base_offset = base_address ;
block . attribute_stride = info . stride ( ) ;
block . memory_location = info . offset ( ) > > 31 ;
2018-10-01 22:05:51 +02:00
block . locations . reserve ( 16 ) ;
2019-01-21 19:07:27 +01:00
block . locations . push_back ( { index , modulo , info . frequency ( ) } ) ;
2017-07-31 13:38:28 +02:00
2017-08-13 21:09:26 +02:00
if ( block . attribute_stride = = 0 )
{
block . single_vertex = true ;
block . attribute_stride = rsx : : get_vertex_type_size_on_host ( info . type ( ) , info . size ( ) ) ;
}
2022-09-07 23:02:52 +02:00
result . interleaved_blocks . push_back ( & block ) ;
2017-07-31 13:38:28 +02:00
}
}
}
for ( auto & info : result . interleaved_blocks )
{
//Calculate real data address to be used during upload
2022-09-07 23:02:52 +02:00
info - > real_offset_address = rsx : : get_address ( rsx : : get_vertex_offset_from_base ( state . vertex_data_base_offset ( ) , info - > base_offset ) , info - > memory_location ) ;
2017-07-31 13:38:28 +02:00
}
2016-01-18 20:10:55 +01:00
}
2017-10-30 13:27:22 +01:00
void thread : : get_current_fragment_program ( const std : : array < std : : unique_ptr < rsx : : sampled_image_descriptor_base > , rsx : : limits : : fragment_textures_count > & sampler_descriptors )
{
2018-04-20 22:44:34 +02:00
if ( ! ( m_graphics_state & rsx : : pipeline_state : : fragment_program_dirty ) )
2018-04-10 17:06:29 +02:00
return ;
2020-12-09 08:47:45 +01:00
ensure ( ! ( m_graphics_state & rsx : : pipeline_state : : fragment_program_ucode_dirty ) ) ;
2020-12-03 19:11:32 +01:00
2018-04-20 22:44:34 +02:00
m_graphics_state & = ~ ( rsx : : pipeline_state : : fragment_program_dirty ) ;
2017-10-30 13:27:22 +01:00
2020-12-03 19:11:32 +01:00
current_fragment_program . ctrl = rsx : : method_registers . shader_control ( ) & ( CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS | CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT ) ;
current_fragment_program . texcoord_control_mask = rsx : : method_registers . texcoord_control_mask ( ) ;
current_fragment_program . two_sided_lighting = rsx : : method_registers . two_side_light_en ( ) ;
2017-10-30 13:27:22 +01:00
2019-11-08 18:19:54 +01:00
if ( method_registers . current_draw_clause . primitive = = primitive_type : : points & &
method_registers . point_sprite_enabled ( ) )
{
// Set high word of the control mask to store point sprite control
2020-12-03 19:11:32 +01:00
current_fragment_program . texcoord_control_mask | = u32 ( method_registers . point_sprite_control_mask ( ) ) < < 16 ;
2019-11-08 18:19:54 +01:00
}
2020-12-03 19:11:32 +01:00
for ( u32 textures_ref = current_fp_metadata . referenced_textures_mask , i = 0 ; textures_ref ; textures_ref > > = 1 , + + i )
2017-10-30 13:27:22 +01:00
{
2020-12-03 19:11:32 +01:00
if ( ! ( textures_ref & 1 ) ) continue ;
2017-10-30 13:27:22 +01:00
2020-12-03 19:11:32 +01:00
auto & tex = rsx : : method_registers . fragment_textures [ i ] ;
2021-09-19 22:18:20 +02:00
current_fp_texture_state . clear ( i ) ;
2022-01-28 20:42:55 +01:00
if ( tex . enabled ( ) & & sampler_descriptors [ i ] - > format_class ! = RSX_FORMAT_CLASS_UNDEFINED )
2017-10-30 13:27:22 +01:00
{
2021-07-31 16:27:16 +02:00
current_fragment_program . texture_params [ i ] . scale [ 0 ] = sampler_descriptors [ i ] - > scale_x ;
current_fragment_program . texture_params [ i ] . scale [ 1 ] = sampler_descriptors [ i ] - > scale_y ;
current_fragment_program . texture_params [ i ] . scale [ 2 ] = sampler_descriptors [ i ] - > scale_z ;
current_fragment_program . texture_params [ i ] . subpixel_bias = 0.f ;
2021-05-12 23:56:01 +02:00
current_fragment_program . texture_params [ i ] . remap = tex . remap ( ) ;
2020-12-03 19:11:32 +01:00
m_graphics_state | = rsx : : pipeline_state : : fragment_texture_state_dirty ;
2018-03-23 12:47:03 +01:00
u32 texture_control = 0 ;
2021-05-12 23:56:01 +02:00
current_fp_texture_state . set_dimension ( sampler_descriptors [ i ] - > image_type , i ) ;
2017-10-30 13:27:22 +01:00
if ( tex . alpha_kill_enabled ( ) )
{
//alphakill can be ignored unless a valid comparison function is set
2020-12-19 12:28:10 +01:00
texture_control | = ( 1 < < texture_control_bits : : ALPHAKILL ) ;
2017-10-30 13:27:22 +01:00
}
2021-01-12 11:01:06 +01:00
//const u32 texaddr = rsx::get_address(tex.offset(), tex.location());
2017-10-30 13:27:22 +01:00
const u32 raw_format = tex . format ( ) ;
2018-03-28 15:00:05 +02:00
const u32 format = raw_format & ~ ( CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN ) ;
2017-10-30 13:27:22 +01:00
if ( raw_format & CELL_GCM_TEXTURE_UN )
2021-07-31 16:27:16 +02:00
{
if ( tex . min_filter ( ) = = rsx : : texture_minify_filter : : nearest | |
tex . mag_filter ( ) = = rsx : : texture_magnify_filter : : nearest )
{
2021-08-09 18:53:25 +02:00
// Subpixel offset so that (X + bias) * scale will round correctly.
// This is done to work around fdiv precision issues in some GPUs (NVIDIA)
current_fragment_program . texture_params [ i ] . subpixel_bias = 0.01f ;
2021-07-31 16:27:16 +02:00
}
}
2022-03-13 09:32:04 +01:00
if ( backend_config . supports_hw_msaa & &
sampler_descriptors [ i ] - > samples > 1 )
{
current_fp_texture_state . multisampled_textures | = ( 1 < < i ) ;
texture_control | = ( static_cast < u32 > ( tex . zfunc ( ) ) < < texture_control_bits : : DEPTH_COMPARE_OP ) ;
2022-04-01 20:53:25 +02:00
texture_control | = ( static_cast < u32 > ( tex . mag_filter ( ) ! = rsx : : texture_magnify_filter : : nearest ) < < texture_control_bits : : FILTERED_MAG ) ;
texture_control | = ( static_cast < u32 > ( tex . min_filter ( ) ! = rsx : : texture_minify_filter : : nearest ) < < texture_control_bits : : FILTERED_MIN ) ;
2022-03-13 09:32:04 +01:00
texture_control | = ( ( ( tex . format ( ) & CELL_GCM_TEXTURE_UN ) > > 6 ) < < texture_control_bits : : UNNORMALIZED_COORDS ) ;
}
2020-08-15 23:33:34 +02:00
if ( sampler_descriptors [ i ] - > format_class ! = RSX_FORMAT_CLASS_COLOR )
2017-10-30 13:27:22 +01:00
{
2020-12-19 12:28:10 +01:00
switch ( sampler_descriptors [ i ] - > format_class )
2018-07-06 15:12:59 +02:00
{
2020-12-19 12:28:10 +01:00
case RSX_FORMAT_CLASS_DEPTH16_FLOAT :
case RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32 :
texture_control | = ( 1 < < texture_control_bits : : DEPTH_FLOAT ) ;
break ;
default :
2018-07-06 15:12:59 +02:00
break ;
}
2020-12-19 12:28:10 +01:00
switch ( format )
{
2017-10-30 13:27:22 +01:00
case CELL_GCM_TEXTURE_A8R8G8B8 :
case CELL_GCM_TEXTURE_D8R8G8B8 :
2017-11-18 14:10:54 +01:00
{
2020-12-19 12:28:10 +01:00
// Emulate bitcast in shader
2021-05-12 23:56:01 +02:00
current_fp_texture_state . redirected_textures | = ( 1 < < i ) ;
2020-12-19 12:28:10 +01:00
const auto float_en = ( sampler_descriptors [ i ] - > format_class = = RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32 ) ? 1 : 0 ;
texture_control | = ( float_en < < texture_control_bits : : DEPTH_FLOAT ) ;
2017-10-30 13:27:22 +01:00
break ;
2017-11-18 14:10:54 +01:00
}
2020-12-28 18:29:15 +01:00
case CELL_GCM_TEXTURE_X16 :
{
// A simple way to quickly read DEPTH16 data without shadow comparison
break ;
}
2017-10-30 13:27:22 +01:00
case CELL_GCM_TEXTURE_DEPTH16 :
2018-04-13 22:59:29 +02:00
case CELL_GCM_TEXTURE_DEPTH24_D8 :
2020-12-19 12:28:10 +01:00
case CELL_GCM_TEXTURE_DEPTH16_FLOAT :
2018-04-13 22:59:29 +02:00
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT :
2017-10-30 13:27:22 +01:00
{
2020-12-28 18:29:15 +01:00
// Natively supported Z formats with shadow comparison feature
const auto compare_mode = tex . zfunc ( ) ;
if ( ! tex . alpha_kill_enabled ( ) & &
compare_mode < rsx : : comparison_function : : always & &
compare_mode > rsx : : comparison_function : : never )
{
2021-05-12 23:56:01 +02:00
current_fp_texture_state . shadow_textures | = ( 1 < < i ) ;
2020-12-28 18:29:15 +01:00
}
2017-10-30 13:27:22 +01:00
break ;
}
default :
2020-02-01 09:07:25 +01:00
rsx_log . error ( " Depth texture bound to pipeline with unexpected format 0x%X " , format ) ;
2017-10-30 13:27:22 +01:00
}
}
2019-10-13 21:37:10 +02:00
else if ( ! backend_config . supports_hw_renormalization )
{
switch ( format )
{
case CELL_GCM_TEXTURE_A1R5G5B5 :
case CELL_GCM_TEXTURE_A4R4G4B4 :
case CELL_GCM_TEXTURE_D1R5G5B5 :
case CELL_GCM_TEXTURE_R5G5B5A1 :
case CELL_GCM_TEXTURE_R5G6B5 :
case CELL_GCM_TEXTURE_R6G5B5 :
2020-12-19 12:28:10 +01:00
texture_control | = ( 1 < < texture_control_bits : : RENORMALIZE ) ;
2019-10-13 21:37:10 +02:00
break ;
default :
break ;
}
}
2018-03-23 12:47:03 +01:00
2020-06-10 23:52:39 +02:00
// Special operations applied to 8-bit formats such as gamma correction and sign conversion
2021-08-29 16:45:51 +02:00
// NOTE: The unsigned_remap being set to anything other than 0 flags the texture as being signed (UE3)
2020-06-10 23:52:39 +02:00
// This is a separate method of setting the format to signed mode without doing so per-channel
2021-08-29 16:45:51 +02:00
// Precedence = SIGNED override > GAMMA > UNSIGNED_REMAP (See Resistance 3 for GAMMA/REMAP relationship, UE3 for REMAP effect)
const u32 argb8_signed = tex . argb_signed ( ) ;
const u32 gamma = tex . gamma ( ) & ~ argb8_signed ;
const u32 unsigned_remap = ( tex . unsigned_remap ( ) = = CELL_GCM_TEXTURE_UNSIGNED_REMAP_NORMAL ) ? 0u : ( ~ gamma & 0xF ) ;
u32 argb8_convert = gamma ;
if ( const u32 sign_convert = ( argb8_signed | unsigned_remap ) )
2020-06-10 23:52:39 +02:00
{
// Apply remap to avoid mapping 1 to -1. Only the sign conversion needs this check
// TODO: Use actual remap mask to account for 0 and 1 overrides in default mapping
// TODO: Replace this clusterfuck of texture control with matrix transformation
const auto remap_ctrl = ( tex . remap ( ) > > 8 ) & 0xAA ;
if ( remap_ctrl = = 0xAA )
{
2020-12-19 12:28:10 +01:00
argb8_convert | = ( sign_convert & 0xFu ) < < texture_control_bits : : EXPAND_OFFSET ;
2020-06-10 23:52:39 +02:00
}
else
{
2020-12-19 12:28:10 +01:00
if ( remap_ctrl & 0x03 ) argb8_convert | = ( sign_convert & 0x1u ) < < texture_control_bits : : EXPAND_OFFSET ;
if ( remap_ctrl & 0x0C ) argb8_convert | = ( sign_convert & 0x2u ) < < texture_control_bits : : EXPAND_OFFSET ;
if ( remap_ctrl & 0x30 ) argb8_convert | = ( sign_convert & 0x4u ) < < texture_control_bits : : EXPAND_OFFSET ;
if ( remap_ctrl & 0xC0 ) argb8_convert | = ( sign_convert & 0x8u ) < < texture_control_bits : : EXPAND_OFFSET ;
2020-06-10 23:52:39 +02:00
}
}
if ( argb8_convert )
2018-03-28 15:00:05 +02:00
{
switch ( format )
{
case CELL_GCM_TEXTURE_DEPTH24_D8 :
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT :
case CELL_GCM_TEXTURE_DEPTH16 :
case CELL_GCM_TEXTURE_DEPTH16_FLOAT :
case CELL_GCM_TEXTURE_X16 :
case CELL_GCM_TEXTURE_Y16_X16 :
case CELL_GCM_TEXTURE_COMPRESSED_HILO8 :
case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8 :
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT :
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT :
case CELL_GCM_TEXTURE_X32_FLOAT :
case CELL_GCM_TEXTURE_Y16_X16_FLOAT :
2020-06-10 23:52:39 +02:00
// Special data formats (XY, HILO, DEPTH) are not RGB formats
// Ignore gamma flags
2018-03-28 15:00:05 +02:00
break ;
default :
2020-06-10 23:52:39 +02:00
texture_control | = argb8_convert ;
2018-03-28 15:00:05 +02:00
break ;
}
}
2021-05-12 23:56:01 +02:00
current_fragment_program . texture_params [ i ] . control = texture_control ;
2017-10-30 13:27:22 +01:00
}
}
2021-05-12 23:56:01 +02:00
// Update texture configuration
current_fragment_program . texture_state . import ( current_fp_texture_state , current_fp_metadata . referenced_textures_mask ) ;
2017-11-17 22:51:38 +01:00
//Sanity checks
2020-12-03 19:11:32 +01:00
if ( current_fragment_program . ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT )
2017-11-17 22:51:38 +01:00
{
//Check that the depth stage is not disabled
if ( ! rsx : : method_registers . depth_test_enabled ( ) )
{
2022-06-04 11:45:50 +02:00
rsx_log . trace ( " FS exports depth component but depth test is disabled (INVALID_OPERATION) " ) ;
2017-11-17 22:51:38 +01:00
}
}
2017-10-30 13:27:22 +01:00
}
2020-03-28 11:17:40 +01:00
bool thread : : invalidate_fragment_program ( u32 dst_dma , u32 dst_offset , u32 size )
{
const auto [ shader_offset , shader_dma ] = rsx : : method_registers . shader_program_address ( ) ;
if ( ( dst_dma & CELL_GCM_LOCATION_MAIN ) = = shader_dma & &
address_range : : start_length ( shader_offset , current_fragment_program . total_length ) . overlaps (
address_range : : start_length ( dst_offset , size ) ) ) [[unlikely]]
{
// Data overlaps
2020-12-03 19:11:32 +01:00
m_graphics_state | = rsx : : pipeline_state : : fragment_program_ucode_dirty ;
2020-03-28 11:17:40 +01:00
return true ;
}
return false ;
}
2015-10-11 20:59:46 +02:00
void thread : : reset ( )
{
2016-06-26 23:37:02 +02:00
rsx : : method_registers . reset ( ) ;
2022-07-09 08:26:34 +02:00
check_zcull_status ( false ) ;
nv4097 : : set_render_mode ( this , 0 , method_registers . registers [ NV4097_SET_RENDER_ENABLE ] ) ;
2022-06-04 19:16:53 +02:00
m_graphics_state = pipeline_state : : all_dirty ;
m_rtts_dirty = true ;
m_framebuffer_state_contested = false ;
2015-10-11 20:59:46 +02:00
}
2014-08-23 02:16:54 +02:00
2019-06-29 00:27:49 +02:00
void thread : : init ( u32 ctrlAddress )
2015-10-11 20:59:46 +02:00
{
2020-12-07 18:10:34 +01:00
dma_address = ctrlAddress ;
2017-07-26 04:33:32 +02:00
ctrl = vm : : _ptr < RsxDmaControl > ( ctrlAddress ) ;
2017-05-15 13:30:14 +02:00
flip_status = CELL_GCM_DISPLAY_FLIP_STATUS_DONE ;
2015-07-01 00:25:52 +02:00
2022-08-19 11:19:33 +02:00
vm : : write32 ( device_addr + 0x30 , 1 ) ;
2021-01-15 19:28:45 +01:00
std : : memset ( display_buffers , 0 , sizeof ( display_buffers ) ) ;
2018-01-21 16:31:35 +01:00
2018-10-11 00:17:19 +02:00
m_rsx_thread_exiting = false ;
2015-10-11 20:59:46 +02:00
}
2015-07-02 03:54:36 +02:00
2019-01-21 19:07:27 +01:00
std : : pair < u32 , u32 > thread : : calculate_memory_requirements ( const vertex_input_layout & layout , u32 first_vertex , u32 vertex_count )
2017-06-25 22:14:56 +02:00
{
2017-07-31 13:38:28 +02:00
u32 persistent_memory_size = 0 ;
u32 volatile_memory_size = 0 ;
2019-12-03 23:34:23 +01:00
volatile_memory_size + = : : size32 ( layout . referenced_registers ) * 16u ;
2017-06-25 22:14:56 +02:00
2017-08-16 23:10:06 +02:00
if ( rsx : : method_registers . current_draw_clause . command = = rsx : : draw_command : : inlined_array )
2017-07-31 13:38:28 +02:00
{
2017-08-16 23:10:06 +02:00
for ( const auto & block : layout . interleaved_blocks )
2017-07-31 13:38:28 +02:00
{
2022-09-07 23:02:52 +02:00
volatile_memory_size + = block - > attribute_stride * vertex_count ;
2017-07-31 13:38:28 +02:00
}
}
else
{
2017-08-16 23:10:06 +02:00
//NOTE: Immediate commands can be index array only or both index array and vertex data
//Check both - but only check volatile blocks if immediate_draw flag is set
if ( rsx : : method_registers . current_draw_clause . is_immediate_draw )
{
for ( const auto & info : layout . volatile_blocks )
{
volatile_memory_size + = info . second ;
}
}
2019-01-21 19:07:27 +01:00
persistent_memory_size = layout . calculate_interleaved_memory_requirements ( first_vertex , vertex_count ) ;
2017-07-31 13:38:28 +02:00
}
2017-06-25 22:14:56 +02:00
2017-07-31 13:38:28 +02:00
return std : : make_pair ( persistent_memory_size , volatile_memory_size ) ;
}
2017-06-25 22:14:56 +02:00
2019-01-21 19:07:27 +01:00
void thread : : fill_vertex_layout_state ( const vertex_input_layout & layout , u32 first_vertex , u32 vertex_count , s32 * buffer , u32 persistent_offset_base , u32 volatile_offset_base )
2017-07-31 13:38:28 +02:00
{
2017-08-16 23:10:06 +02:00
std : : array < s32 , 16 > offset_in_block = { } ;
2018-01-21 16:31:35 +01:00
u32 volatile_offset = volatile_offset_base ;
u32 persistent_offset = persistent_offset_base ;
2017-07-18 21:34:06 +02:00
2017-10-25 13:15:28 +02:00
//NOTE: Order is important! Transient ayout is always push_buffers followed by register data
2017-07-31 13:38:28 +02:00
if ( rsx : : method_registers . current_draw_clause . is_immediate_draw )
{
2017-08-16 23:10:06 +02:00
for ( const auto & info : layout . volatile_blocks )
2017-07-31 13:38:28 +02:00
{
offset_in_block [ info . first ] = volatile_offset ;
volatile_offset + = info . second ;
2017-06-25 22:14:56 +02:00
}
}
2017-07-31 13:38:28 +02:00
for ( u8 index : layout . referenced_registers )
{
offset_in_block [ index ] = volatile_offset ;
volatile_offset + = 16 ;
}
2017-07-18 21:34:06 +02:00
2017-07-31 13:38:28 +02:00
if ( rsx : : method_registers . current_draw_clause . command = = rsx : : draw_command : : inlined_array )
2017-06-25 22:14:56 +02:00
{
2017-08-16 23:10:06 +02:00
const auto & block = layout . interleaved_blocks [ 0 ] ;
2018-10-20 15:00:53 +02:00
u32 inline_data_offset = volatile_offset ;
2022-09-07 23:02:52 +02:00
for ( const auto & attrib : block - > locations )
2017-07-31 13:38:28 +02:00
{
2019-01-21 19:07:27 +01:00
auto & info = rsx : : method_registers . vertex_arrays_info [ attrib . index ] ;
2017-07-18 21:34:06 +02:00
2019-01-21 19:07:27 +01:00
offset_in_block [ attrib . index ] = inline_data_offset ;
2018-01-21 16:31:35 +01:00
inline_data_offset + = rsx : : get_vertex_type_size_on_host ( info . type ( ) , info . size ( ) ) ;
2017-07-31 13:38:28 +02:00
}
}
else
{
2017-08-16 23:10:06 +02:00
for ( const auto & block : layout . interleaved_blocks )
2017-07-18 21:34:06 +02:00
{
2022-09-07 23:02:52 +02:00
for ( const auto & attrib : block - > locations )
2017-07-31 13:38:28 +02:00
{
2019-01-21 19:07:27 +01:00
const u32 local_address = ( rsx : : method_registers . vertex_arrays_info [ attrib . index ] . offset ( ) & 0x7fffffff ) ;
2022-09-07 23:02:52 +02:00
offset_in_block [ attrib . index ] = persistent_offset + ( local_address - block - > base_offset ) ;
2017-08-13 21:09:26 +02:00
}
2017-07-31 13:38:28 +02:00
2022-09-07 23:02:52 +02:00
const auto range = block - > calculate_required_range ( first_vertex , vertex_count ) ;
persistent_offset + = block - > attribute_stride * range . second ;
2017-07-18 21:34:06 +02:00
}
2017-06-25 22:14:56 +02:00
}
2018-10-20 16:43:00 +02:00
// Fill the data
// Each descriptor field is 64 bits wide
2018-10-31 22:25:59 +01:00
// [0-8] attribute stride
// [8-24] attribute divisor
// [24-27] attribute type
// [27-30] attribute size
// [30-31] reserved
// [31-60] starting offset
// [60-21] swap bytes flag
// [61-22] volatile flag
// [62-63] modulo enable flag
const s32 default_frequency_mask = ( 1 < < 8 ) ;
const s32 swap_storage_mask = ( 1 < < 29 ) ;
const s32 volatile_storage_mask = ( 1 < < 30 ) ;
2021-05-22 20:46:10 +02:00
const s32 modulo_op_frequency_mask = smin ;
2017-07-31 13:38:28 +02:00
const u32 modulo_mask = rsx : : method_registers . frequency_divider_operation_mask ( ) ;
2019-01-21 19:07:27 +01:00
const auto max_index = ( first_vertex + vertex_count ) - 1 ;
2017-07-31 13:38:28 +02:00
2021-09-15 19:46:03 +02:00
for ( u16 ref_mask = current_vp_metadata . referenced_inputs_mask , index = 0 ; ref_mask ; + + index , ref_mask > > = 1 )
2017-07-18 21:34:06 +02:00
{
2021-09-15 19:46:03 +02:00
if ( ! ( ref_mask & 1u ) )
{
// Unused input, ignore this
continue ;
}
2017-07-31 13:38:28 +02:00
if ( layout . attribute_placement [ index ] = = attribute_buffer_placement : : none )
2018-11-11 10:27:07 +01:00
{
2021-03-08 21:41:23 +01:00
static constexpr u64 zero = 0 ;
std : : memcpy ( buffer + index * 2 , & zero , sizeof ( zero ) ) ;
2017-07-31 13:38:28 +02:00
continue ;
2018-11-11 10:27:07 +01:00
}
2017-07-31 13:38:28 +02:00
rsx : : vertex_base_type type = { } ;
s32 size = 0 ;
2018-10-31 22:25:59 +01:00
s32 attrib0 = 0 ;
s32 attrib1 = 0 ;
2017-07-18 21:34:06 +02:00
2017-07-31 13:38:28 +02:00
if ( layout . attribute_placement [ index ] = = attribute_buffer_placement : : transient )
2017-07-18 21:34:06 +02:00
{
2017-07-31 13:38:28 +02:00
if ( rsx : : method_registers . current_draw_clause . command = = rsx : : draw_command : : inlined_array )
{
2018-10-31 22:25:59 +01:00
const auto & info = rsx : : method_registers . vertex_arrays_info [ index ] ;
if ( ! info . size ( ) )
2018-10-20 15:00:53 +02:00
{
// Register
const auto & reginfo = rsx : : method_registers . register_vertex_info [ index ] ;
type = reginfo . type ;
size = reginfo . size ;
2017-07-31 13:38:28 +02:00
2018-10-31 22:25:59 +01:00
attrib0 = rsx : : get_vertex_type_size_on_host ( type , size ) ;
2018-10-20 15:00:53 +02:00
}
else
{
2018-10-31 22:25:59 +01:00
// Array
2018-11-09 15:57:07 +01:00
type = info . type ( ) ;
size = info . size ( ) ;
2022-09-07 23:02:52 +02:00
attrib0 = layout . interleaved_blocks [ 0 ] - > attribute_stride | default_frequency_mask ;
2018-10-20 15:00:53 +02:00
}
2017-07-31 13:38:28 +02:00
}
2017-08-16 23:10:06 +02:00
else
2017-07-31 13:38:28 +02:00
{
2018-08-28 17:19:26 +02:00
// Data is either from an immediate render or register input
// Immediate data overrides register input
2017-07-31 13:38:28 +02:00
2018-04-01 12:18:57 +02:00
if ( rsx : : method_registers . current_draw_clause . is_immediate_draw & &
vertex_push_buffers [ index ] . vertex_count > 1 )
2017-08-16 23:10:06 +02:00
{
2018-10-31 22:25:59 +01:00
// Push buffer
2018-11-24 22:31:16 +01:00
const auto & info = vertex_push_buffers [ index ] ;
2017-08-16 23:10:06 +02:00
type = info . type ;
size = info . size ;
2017-07-31 13:38:28 +02:00
2018-10-31 22:25:59 +01:00
attrib0 = rsx : : get_vertex_type_size_on_host ( type , size ) | default_frequency_mask ;
2017-08-16 23:10:06 +02:00
}
else
{
2018-08-28 17:19:26 +02:00
// Register
2017-08-16 23:10:06 +02:00
const auto & info = rsx : : method_registers . register_vertex_info [ index ] ;
type = info . type ;
size = info . size ;
2017-07-31 13:38:28 +02:00
2018-10-31 22:25:59 +01:00
attrib0 = rsx : : get_vertex_type_size_on_host ( type , size ) ;
2017-08-16 23:10:06 +02:00
}
2017-07-31 13:38:28 +02:00
}
2018-10-31 22:25:59 +01:00
attrib1 | = volatile_storage_mask ;
2017-07-18 21:34:06 +02:00
}
2017-07-31 13:38:28 +02:00
else
{
auto & info = rsx : : method_registers . vertex_arrays_info [ index ] ;
type = info . type ( ) ;
size = info . size ( ) ;
2017-08-13 21:09:26 +02:00
auto stride = info . stride ( ) ;
2018-10-31 22:25:59 +01:00
attrib0 = stride ;
2017-08-13 21:09:26 +02:00
if ( stride > 0 ) //when stride is 0, input is not an array but a single element
2017-07-31 13:38:28 +02:00
{
2017-08-13 21:09:26 +02:00
const u32 frequency = info . frequency ( ) ;
switch ( frequency )
{
case 0 :
case 1 :
2018-08-28 17:19:26 +02:00
{
2018-10-31 22:25:59 +01:00
attrib0 | = default_frequency_mask ;
2017-08-13 21:09:26 +02:00
break ;
2018-08-28 17:19:26 +02:00
}
2017-08-13 21:09:26 +02:00
default :
{
if ( modulo_mask & ( 1 < < index ) )
2019-01-21 19:07:27 +01:00
{
if ( max_index > = frequency )
{
// Only set modulo mask if a modulo op is actually necessary!
// This requires that the uploaded range for this attr = [0, freq-1]
// Ignoring modulo op if the rendered range does not wrap allows for range optimization
attrib0 | = ( frequency < < 8 ) ;
attrib1 | = modulo_op_frequency_mask ;
}
else
{
attrib0 | = default_frequency_mask ;
}
}
else
{
// Division
attrib0 | = ( frequency < < 8 ) ;
}
2018-08-28 17:19:26 +02:00
break ;
2017-08-13 21:09:26 +02:00
}
}
2017-07-31 13:38:28 +02:00
}
} //end attribute placement check
2021-06-13 22:34:01 +02:00
// Special compressed 4 components into one 4-byte value. Decoded as one value.
if ( type = = rsx : : vertex_base_type : : cmp )
2017-07-31 13:38:28 +02:00
{
size = 1 ;
}
2021-06-13 22:34:01 +02:00
// All data is passed in in PS3-native order (BE) so swap flag should be set
attrib1 | = swap_storage_mask ;
2018-10-31 22:25:59 +01:00
attrib0 | = ( static_cast < s32 > ( type ) < < 24 ) ;
attrib0 | = ( size < < 27 ) ;
attrib1 | = offset_in_block [ index ] ;
2018-10-20 16:43:00 +02:00
2018-11-01 11:28:15 +01:00
buffer [ index * 2 + 0 ] = attrib0 ;
buffer [ index * 2 + 1 ] = attrib1 ;
2017-07-18 21:34:06 +02:00
}
2017-06-25 22:14:56 +02:00
}
2018-01-21 16:31:35 +01:00
void thread : : write_vertex_data_to_memory ( const vertex_input_layout & layout , u32 first_vertex , u32 vertex_count , void * persistent_data , void * volatile_data )
2017-06-25 22:14:56 +02:00
{
2019-12-03 23:34:23 +01:00
auto transient = static_cast < char * > ( volatile_data ) ;
auto persistent = static_cast < char * > ( persistent_data ) ;
2017-07-31 13:38:28 +02:00
auto & draw_call = rsx : : method_registers . current_draw_clause ;
if ( transient ! = nullptr )
2017-06-25 22:14:56 +02:00
{
2017-07-31 13:38:28 +02:00
if ( draw_call . command = = rsx : : draw_command : : inlined_array )
{
2018-10-20 15:00:53 +02:00
for ( const u8 index : layout . referenced_registers )
{
memcpy ( transient , rsx : : method_registers . register_vertex_info [ index ] . data . data ( ) , 16 ) ;
transient + = 16 ;
}
2017-07-31 13:38:28 +02:00
memcpy ( transient , draw_call . inline_vertex_array . data ( ) , draw_call . inline_vertex_array . size ( ) * sizeof ( u32 ) ) ;
//Is it possible to reference data outside of the inlined array?
return ;
}
2018-08-28 17:19:26 +02:00
//NOTE: Order is important! Transient layout is always push_buffers followed by register data
2017-07-31 13:38:28 +02:00
if ( draw_call . is_immediate_draw )
{
2017-08-16 23:10:06 +02:00
//NOTE: It is possible for immediate draw to only contain index data, so vertex data can be in persistent memory
for ( const auto & info : layout . volatile_blocks )
2017-07-31 13:38:28 +02:00
{
memcpy ( transient , vertex_push_buffers [ info . first ] . data . data ( ) , info . second ) ;
transient + = info . second ;
}
}
2017-10-25 13:15:28 +02:00
for ( const u8 index : layout . referenced_registers )
{
memcpy ( transient , rsx : : method_registers . register_vertex_info [ index ] . data . data ( ) , 16 ) ;
transient + = 16 ;
}
2017-06-25 22:14:56 +02:00
}
2017-07-31 13:38:28 +02:00
if ( persistent ! = nullptr )
{
2017-08-16 23:10:06 +02:00
for ( const auto & block : layout . interleaved_blocks )
2017-07-31 13:38:28 +02:00
{
2022-09-07 23:02:52 +02:00
auto range = block - > calculate_required_range ( first_vertex , vertex_count ) ;
2017-06-25 22:14:56 +02:00
2022-09-07 23:02:52 +02:00
const u32 data_size = range . second * block - > attribute_stride ;
const u32 vertex_base = range . first * block - > attribute_stride ;
2017-07-18 21:34:06 +02:00
2022-09-07 23:02:52 +02:00
g_fxo - > get < rsx : : dma_manager > ( ) . copy ( persistent , vm : : _ptr < char > ( block - > real_offset_address ) + vertex_base , data_size ) ;
2017-07-31 13:38:28 +02:00
persistent + = data_size ;
}
}
2017-06-25 22:14:56 +02:00
}
2017-06-30 00:20:23 +02:00
2019-09-19 19:08:06 +02:00
void thread : : flip ( const display_flip_info_t & info )
2017-06-30 00:20:23 +02:00
{
2022-05-16 22:38:11 +02:00
m_eng_interrupt_mask . clear ( rsx : : display_interrupt ) ;
2019-09-17 13:26:03 +02:00
if ( async_flip_requested & flip_request : : any )
2018-11-07 10:34:03 +01:00
{
2019-09-17 13:26:03 +02:00
// Deferred flip
2019-09-19 19:08:06 +02:00
if ( info . emu_flip )
2019-04-03 15:42:33 +02:00
{
async_flip_requested . clear ( flip_request : : emu_requested ) ;
}
else
{
async_flip_requested . clear ( flip_request : : native_ui ) ;
}
2018-11-30 12:39:15 +01:00
}
2018-03-05 12:09:43 +01:00
2019-09-19 19:08:06 +02:00
if ( info . emu_flip )
2018-03-05 12:09:43 +01:00
{
2019-09-17 13:26:03 +02:00
performance_counters . sampled_frames + + ;
2022-07-04 15:02:17 +02:00
2022-10-29 19:53:00 +02:00
if ( m_pause_after_x_flips & & m_pause_after_x_flips - - = = 1 )
2022-07-04 15:02:17 +02:00
{
Emu . Pause ( ) ;
}
2018-03-05 12:09:43 +01:00
}
2022-04-13 21:29:26 +02:00
last_host_flip_timestamp = rsx : : uclock ( ) ;
2017-06-30 00:20:23 +02:00
}
2017-11-16 22:52:21 +01:00
2018-03-05 12:09:43 +01:00
void thread : : check_zcull_status ( bool framebuffer_swap )
2017-11-16 22:52:21 +01:00
{
2022-07-09 08:26:34 +02:00
const bool zcull_rendering_enabled = ! ! method_registers . registers [ NV4097_SET_ZCULL_EN ] ;
const bool zcull_stats_enabled = ! ! method_registers . registers [ NV4097_SET_ZCULL_STATS_ENABLE ] ;
const bool zcull_pixel_cnt_enabled = ! ! method_registers . registers [ NV4097_SET_ZPASS_PIXEL_COUNT_ENABLE ] ;
2017-11-16 22:52:21 +01:00
if ( framebuffer_swap )
{
zcull_surface_active = false ;
const u32 zeta_address = m_depth_surface_info . address ;
if ( zeta_address )
{
//Find zeta address in bound zculls
2019-06-08 10:35:09 +02:00
for ( const auto & zcull : zculls )
2017-11-16 22:52:21 +01:00
{
2020-04-18 17:46:41 +02:00
if ( zcull . bound & &
rsx : : to_surface_depth_format ( zcull . zFormat ) = = m_depth_surface_info . depth_format & &
rsx : : to_surface_antialiasing ( zcull . aaFormat ) = = rsx : : method_registers . surface_antialias ( ) )
2017-11-16 22:52:21 +01:00
{
2020-12-09 16:04:52 +01:00
const u32 rsx_address = rsx : : get_address ( zcull . offset , CELL_GCM_LOCATION_LOCAL ) ;
2017-11-16 22:52:21 +01:00
if ( rsx_address = = zeta_address )
{
zcull_surface_active = true ;
break ;
}
}
}
}
}
2018-03-05 12:09:43 +01:00
zcull_ctrl - > set_enabled ( this , zcull_rendering_enabled ) ;
2020-02-22 16:19:16 +01:00
zcull_ctrl - > set_status ( this , zcull_surface_active , zcull_pixel_cnt_enabled , zcull_stats_enabled ) ;
2017-11-16 22:52:21 +01:00
}
void thread : : clear_zcull_stats ( u32 type )
{
2020-02-29 10:04:01 +01:00
zcull_ctrl - > clear ( this , type ) ;
2017-11-16 22:52:21 +01:00
}
2018-03-05 12:09:43 +01:00
void thread : : get_zcull_stats ( u32 type , vm : : addr_t sink )
2017-11-16 22:52:21 +01:00
{
2018-03-05 12:09:43 +01:00
u32 value = 0 ;
if ( ! g_cfg . video . disable_zcull_queries )
2017-11-16 22:52:21 +01:00
{
2018-03-05 12:09:43 +01:00
switch ( type )
{
case CELL_GCM_ZPASS_PIXEL_CNT :
case CELL_GCM_ZCULL_STATS :
case CELL_GCM_ZCULL_STATS1 :
case CELL_GCM_ZCULL_STATS2 :
case CELL_GCM_ZCULL_STATS3 :
{
2018-03-13 14:34:31 +01:00
zcull_ctrl - > read_report ( this , sink , type ) ;
return ;
2018-03-05 12:09:43 +01:00
}
default :
2020-02-01 09:07:25 +01:00
rsx_log . error ( " Unknown zcull stat type %d " , type ) ;
2017-11-16 22:52:21 +01:00
break ;
}
}
2020-09-14 22:38:17 +02:00
rsx : : reservation_lock < true > lock ( sink , 16 ) ;
2019-07-26 07:05:50 +02:00
vm : : _ref < atomic_t < CellGcmReportData > > ( sink ) . store ( { timestamp ( ) , value , 0 } ) ;
2018-03-05 12:09:43 +01:00
}
2017-11-16 22:52:21 +01:00
2019-10-24 22:02:42 +02:00
u32 thread : : copy_zcull_stats ( u32 memory_range_start , u32 memory_range , u32 destination )
{
return zcull_ctrl - > copy_reports_to ( memory_range_start , memory_range , destination ) ;
}
2019-11-30 13:44:47 +01:00
void thread : : enable_conditional_rendering ( vm : : addr_t ref )
{
cond_render_ctrl . enable_conditional_render ( this , ref ) ;
2019-12-10 05:56:44 +01:00
auto result = zcull_ctrl - > find_query ( ref , true ) ;
2019-11-30 13:44:47 +01:00
if ( result . found )
{
2019-12-10 05:56:44 +01:00
if ( ! result . queries . empty ( ) )
2019-11-30 13:44:47 +01:00
{
2019-12-10 05:56:44 +01:00
cond_render_ctrl . set_eval_sources ( result . queries ) ;
2022-05-14 18:16:16 +02:00
sync_hint ( FIFO_hint : : hint_conditional_render_eval , { . query = cond_render_ctrl . eval_sources . front ( ) , . address = ref } ) ;
2019-11-30 13:44:47 +01:00
}
else
{
bool failed = ( result . raw_zpass_result = = 0 ) ;
cond_render_ctrl . set_eval_result ( this , failed ) ;
}
}
else
{
cond_render_ctrl . eval_result ( this ) ;
}
}
void thread : : disable_conditional_rendering ( )
{
cond_render_ctrl . disable_conditional_render ( this ) ;
}
2019-12-10 05:56:44 +01:00
void thread : : begin_conditional_rendering ( const std : : vector < reports : : occlusion_query_info * > & /*sources*/ )
2019-11-30 13:44:47 +01:00
{
cond_render_ctrl . hw_cond_active = true ;
2019-12-10 05:56:44 +01:00
cond_render_ctrl . eval_sources . clear ( ) ;
2019-11-30 13:44:47 +01:00
}
void thread : : end_conditional_rendering ( )
{
cond_render_ctrl . hw_cond_active = false ;
}
2018-03-05 12:09:43 +01:00
void thread : : sync ( )
{
2022-05-20 23:53:48 +02:00
m_eng_interrupt_mask . clear ( rsx : : pipe_flush_interrupt ) ;
2019-12-11 17:28:31 +01:00
if ( zcull_ctrl - > has_pending ( ) )
{
2022-05-10 23:58:59 +02:00
zcull_ctrl - > sync ( this ) ;
2019-12-11 17:28:31 +01:00
}
2018-03-13 14:34:31 +01:00
2018-10-20 16:43:00 +02:00
// Fragment constants may have been updated
m_graphics_state | = rsx : : pipeline_state : : fragment_constants_dirty ;
2019-06-19 21:01:48 +02:00
// DMA sync; if you need this, don't use MTRSX
2021-03-02 12:59:19 +01:00
// g_fxo->get<rsx::dma_manager>().sync();
2019-06-19 21:01:48 +02:00
2018-03-13 16:15:48 +01:00
//TODO: On sync every sub-unit should finish any pending tasks
//Might cause zcull lockup due to zombie 'unclaimed reports' which are not forcefully removed currently
2020-12-09 08:47:45 +01:00
//ensure(async_tasks_pending.load() == 0);
2018-03-13 14:34:31 +01:00
}
2022-05-14 18:04:12 +02:00
void thread : : sync_hint ( FIFO_hint /*hint*/ , rsx : : reports : : sync_hint_payload_t payload )
2019-11-30 13:44:47 +01:00
{
2022-05-14 18:04:12 +02:00
zcull_ctrl - > on_sync_hint ( payload ) ;
2019-11-30 13:44:47 +01:00
}
2020-04-10 21:25:59 +02:00
bool thread : : is_fifo_idle ( ) const
{
2020-04-15 22:55:13 +02:00
return ctrl = = nullptr | | ctrl - > get = = ( ctrl - > put & ~ 3 ) ;
2020-04-10 21:25:59 +02:00
}
2019-09-29 19:54:33 +02:00
void thread : : flush_fifo ( )
{
// Make sure GET value is exposed before sync points
fifo_ctrl - > sync_get ( ) ;
2022-06-04 14:35:06 +02:00
fifo_ctrl - > invalidate_cache ( ) ;
2019-09-29 19:54:33 +02:00
}
2022-03-25 16:17:25 +01:00
std : : pair < u32 , u32 > thread : : try_get_pc_of_x_cmds_backwards ( u32 count , u32 get ) const
{
if ( ! ctrl )
{
return { 0 , umax } ;
}
if ( ! count )
{
return { 0 , get } ;
}
u32 true_get = ctrl - > get ;
u32 start = last_known_code_start ;
RSXDisAsm disasm ( cpu_disasm_mode : : survey_cmd_size , vm : : g_sudo_addr , 0 , this ) ;
std : : vector < u32 > pcs_of_valid_cmds ;
pcs_of_valid_cmds . reserve ( std : : min < u32 > ( ( get - start ) / 16 , 0x4000 ) ) ; // Rough estimation of final array size
auto probe_code_region = [ & ] ( u32 probe_start ) - > std : : pair < u32 , u32 >
{
pcs_of_valid_cmds . clear ( ) ;
pcs_of_valid_cmds . push_back ( probe_start ) ;
while ( pcs_of_valid_cmds . back ( ) < get )
{
if ( u32 advance = disasm . disasm ( pcs_of_valid_cmds . back ( ) ) )
{
pcs_of_valid_cmds . push_back ( pcs_of_valid_cmds . back ( ) + advance ) ;
}
else
{
2022-03-25 16:19:02 +01:00
return { 0 , get } ;
2022-03-25 16:17:25 +01:00
}
}
if ( pcs_of_valid_cmds . size ( ) = = 1u | | pcs_of_valid_cmds . back ( ) ! = get )
{
2022-03-25 16:19:02 +01:00
return { 0 , get } ;
2022-03-25 16:17:25 +01:00
}
u32 found_cmds_count = std : : min ( count , : : size32 ( pcs_of_valid_cmds ) - 1 ) ;
return { found_cmds_count , * ( pcs_of_valid_cmds . end ( ) - 1 - found_cmds_count ) } ;
} ;
auto pair = probe_code_region ( start ) ;
if ( ! pair . first )
{
pair = probe_code_region ( true_get ) ;
}
return pair ;
}
2021-08-13 20:46:38 +02:00
void thread : : recover_fifo ( u32 line , u32 col , const char * file , const char * func )
2019-10-09 19:45:24 +02:00
{
2022-06-04 14:35:06 +02:00
bool kill_itself = g_cfg . core . rsx_fifo_accuracy = = rsx_fifo_mode : : as_ps3 ;
2022-03-06 13:09:28 +01:00
const u64 current_time = rsx : : uclock ( ) ;
2020-03-26 08:05:20 +01:00
2020-04-04 11:19:21 +02:00
if ( recovered_fifo_cmds_history . size ( ) = = 20u )
2020-03-26 08:05:20 +01:00
{
const auto cmd_info = recovered_fifo_cmds_history . front ( ) ;
// Check timestamp of last tracked cmd
2021-08-13 20:46:38 +02:00
// Shorten the range of forbidden difference if driver wake-up delay is used
if ( current_time - cmd_info . timestamp < 2'000'000u - std : : min < u32 > ( g_cfg . video . driver_wakeup_delay * 700 , 1'400'000 ) )
2020-03-26 08:05:20 +01:00
{
// Probably hopeless
2022-06-04 14:35:06 +02:00
kill_itself = true ;
2020-03-26 08:05:20 +01:00
}
// Erase the last command from history, keep the size of the queue the same
recovered_fifo_cmds_history . pop ( ) ;
}
2022-06-04 14:35:06 +02:00
if ( kill_itself )
{
2022-06-11 23:26:12 +02:00
fmt : : throw_exception ( " Dead FIFO commands queue state has been detected! "
" \n Try increasing \" Driver Wake-Up Delay \" setting or setting \" RSX FIFO Accuracy \" to \" %s \" , both in Advanced settings. Called from %s " , std : : min < rsx_fifo_mode > ( rsx_fifo_mode { static_cast < u32 > ( g_cfg . core . rsx_fifo_accuracy . get ( ) ) + 1 } , rsx_fifo_mode : : atomic_ordered ) , src_loc { line , col , file , func } ) ;
2022-06-04 14:35:06 +02:00
}
2019-10-09 19:45:24 +02:00
// Error. Should reset the queue
fifo_ctrl - > set_get ( restore_point ) ;
fifo_ret_addr = saved_fifo_ret ;
2020-04-04 11:19:21 +02:00
std : : this_thread : : sleep_for ( 2 ms ) ;
2020-03-28 11:11:35 +01:00
fifo_ctrl - > abort ( ) ;
2019-12-07 18:07:58 +01:00
if ( std : : exchange ( in_begin_end , false ) & & ! rsx : : method_registers . current_draw_clause . empty ( ) )
{
execute_nop_draw ( ) ;
rsx : : thread : : end ( ) ;
}
2020-03-26 08:05:20 +01:00
recovered_fifo_cmds_history . push ( { fifo_ctrl - > last_cmd ( ) , current_time } ) ;
2019-10-09 19:45:24 +02:00
}
2021-01-22 09:11:54 +01:00
std : : vector < std : : pair < u32 , u32 > > thread : : dump_callstack_list ( ) const
2021-01-15 19:28:45 +01:00
{
std : : vector < std : : pair < u32 , u32 > > result ;
if ( u32 addr = fifo_ret_addr ; addr ! = RSX_CALL_STACK_EMPTY )
{
result . emplace_back ( addr , 0 ) ;
}
return result ;
}
2022-06-30 10:22:17 +02:00
void thread : : fifo_wake_delay ( u64 div )
2020-01-07 21:22:30 +01:00
{
2022-06-30 10:22:17 +02:00
// TODO: Nanoseconds accuracy
u64 remaining = g_cfg . video . driver_wakeup_delay ;
if ( ! remaining )
{
return ;
}
2020-01-07 21:22:30 +01:00
// Some cases do not need full delay
2022-06-30 10:22:17 +02:00
remaining = utils : : aligned_div ( remaining , div ) ;
const u64 until = rsx : : uclock ( ) + remaining ;
while ( true )
{
# ifdef __linux__
// NOTE: Assumption that timer initialization has succeeded
u64 host_min_quantum = remaining < = 1000 ? 10 : 50 ;
# else
// Host scheduler quantum for windows (worst case)
// NOTE: On ps3 this function has very high accuracy
constexpr u64 host_min_quantum = 500 ;
# endif
if ( remaining > = host_min_quantum )
{
# ifdef __linux__
// Do not wait for the last quantum to avoid loss of accuracy
thread_ctrl : : wait_for ( remaining - ( ( remaining % host_min_quantum ) + host_min_quantum ) , false ) ;
# else
// Wait on multiple of min quantum for large durations to avoid overloading low thread cpus
thread_ctrl : : wait_for ( remaining - ( remaining % host_min_quantum ) , false ) ;
# endif
}
// TODO: Determine best value for yield delay
else if ( remaining > = host_min_quantum / 2 )
{
std : : this_thread : : yield ( ) ;
}
else
{
busy_wait ( 100 ) ;
}
const u64 current = rsx : : uclock ( ) ;
if ( current > = until )
{
break ;
}
remaining = until - current ;
}
2020-01-07 21:22:30 +01:00
}
2020-02-11 06:00:30 +01:00
u32 thread : : get_fifo_cmd ( ) const
2019-10-19 21:53:54 +02:00
{
// Last fifo cmd for logging and utility
return fifo_ctrl - > last_cmd ( ) ;
}
2021-01-15 19:28:45 +01:00
void invalid_method ( thread * , u32 , u32 ) ;
2022-06-22 11:00:06 +02:00
void thread : : dump_regs ( std : : string & result ) const
2021-01-15 19:28:45 +01:00
{
2021-08-14 20:20:11 +02:00
if ( ctrl )
{
fmt : : append ( result , " FIFO: GET=0x%07x, PUT=0x%07x, REF=0x%08x \n " , + ctrl - > get , + ctrl - > put , + ctrl - > ref ) ;
}
2021-01-15 19:28:45 +01:00
for ( u32 i = 0 ; i < 1 < < 14 ; i + + )
{
if ( rsx : : methods [ i ] = = & invalid_method )
{
continue ;
}
switch ( i )
{
case NV4097_NO_OPERATION :
case NV4097_INVALIDATE_L2 :
case NV4097_INVALIDATE_VERTEX_FILE :
case NV4097_INVALIDATE_VERTEX_CACHE_FILE :
case NV4097_INVALIDATE_ZCULL :
case NV4097_WAIT_FOR_IDLE :
case NV4097_PM_TRIGGER :
case NV4097_ZCULL_SYNC :
continue ;
2022-06-22 11:00:06 +02:00
case NV308A_COLOR :
{
i = NV3089_SET_OBJECT ;
continue ;
}
2021-01-20 07:00:34 +01:00
default :
2021-01-15 19:28:45 +01:00
{
break ;
}
}
fmt : : append ( result , " [%04x] %s \n " , i , ensure ( rsx : : get_pretty_printing_function ( i ) ) ( i , method_registers . registers [ i ] ) ) ;
}
}
2019-10-24 22:02:42 +02:00
flags32_t thread : : read_barrier ( u32 memory_address , u32 memory_range , bool unconditional )
2018-03-13 14:34:31 +01:00
{
2019-10-24 22:02:42 +02:00
flags32_t zcull_flags = ( unconditional ) ? reports : : sync_none : reports : : sync_defer_copy ;
return zcull_ctrl - > read_barrier ( this , memory_address , memory_range , zcull_flags ) ;
2017-11-16 22:52:21 +01:00
}
void thread : : notify_zcull_info_changed ( )
{
2018-03-05 12:09:43 +01:00
check_zcull_status ( false ) ;
2017-11-16 22:52:21 +01:00
}
2017-12-09 11:14:00 +01:00
2018-09-22 16:45:55 +02:00
void thread : : on_notify_memory_mapped ( u32 address , u32 size )
{
2018-09-22 02:14:26 +02:00
// In the case where an unmap is followed shortly after by a remap of the same address space
// we must block until RSX has invalidated the memory
// or lock m_mtx_task and do it ourselves
if ( m_rsx_thread_exiting )
return ;
reader_lock lock ( m_mtx_task ) ;
2018-09-22 16:45:55 +02:00
2018-09-22 02:14:26 +02:00
const auto map_range = address_range : : start_length ( address , size ) ;
2018-09-25 14:21:04 +02:00
2018-09-22 02:14:26 +02:00
if ( ! m_invalidated_memory_range . valid ( ) )
return ;
if ( m_invalidated_memory_range . overlaps ( map_range ) )
{
lock . upgrade ( ) ;
handle_invalidated_memory_range ( ) ;
}
}
2018-09-22 16:45:55 +02:00
2018-09-22 02:14:26 +02:00
void thread : : on_notify_memory_unmapped ( u32 address , u32 size )
2018-05-23 16:32:36 +02:00
{
2019-06-29 00:27:49 +02:00
if ( ! m_rsx_thread_exiting & & address < rsx : : constants : : local_mem_base )
2018-05-27 12:48:21 +02:00
{
2019-07-25 15:12:51 +02:00
if ( ! isHLE )
2018-05-27 12:48:21 +02:00
{
2019-07-25 15:12:51 +02:00
// Each bit represents io entry to be unmapped
u64 unmap_status [ 512 / 64 ] { } ;
2019-07-27 04:57:32 +02:00
for ( u32 ea = address > > 20 , end = ea + ( size > > 20 ) ; ea < end ; ea + + )
2018-05-27 12:48:21 +02:00
{
2020-12-21 15:12:05 +01:00
const u32 io = utils : : rol32 ( iomap_table . io [ ea ] , 32 - 20 ) ;
2019-07-25 15:12:51 +02:00
2020-01-16 21:40:47 +01:00
if ( io + 1 )
2019-07-25 15:12:51 +02:00
{
unmap_status [ io / 64 ] | = 1ull < < ( io & 63 ) ;
2020-01-16 21:40:47 +01:00
iomap_table . ea [ io ] . release ( - 1 ) ;
iomap_table . io [ ea ] . release ( - 1 ) ;
2019-07-25 15:12:51 +02:00
}
2018-05-27 12:48:21 +02:00
}
2019-07-25 15:12:51 +02:00
for ( u32 i = 0 ; i < std : : size ( unmap_status ) ; i + + )
2018-05-27 12:48:21 +02:00
{
2019-07-25 15:12:51 +02:00
// TODO: Check order when sending multiple events
if ( u64 to_unmap = unmap_status [ i ] )
2018-05-27 12:48:21 +02:00
{
2019-08-25 00:09:54 +02:00
// Each 64 entries are grouped by a bit
2020-04-07 17:31:19 +02:00
const u64 io_event = SYS_RSX_EVENT_UNMAPPED_BASE < < i ;
2021-05-09 20:16:14 +02:00
send_event ( 0 , io_event , to_unmap ) ;
2018-05-27 12:48:21 +02:00
}
}
}
2019-07-25 15:12:51 +02:00
else
{
// TODO: Fix this
2020-01-16 21:40:47 +01:00
u32 ea = address > > 20 , io = iomap_table . io [ ea ] ;
2019-07-25 15:12:51 +02:00
2020-01-16 21:40:47 +01:00
if ( io + 1 )
2019-07-25 15:12:51 +02:00
{
2020-01-16 21:40:47 +01:00
io > > = 20 ;
2021-03-02 12:59:19 +01:00
auto & cfg = g_fxo - > get < gcm_config > ( ) ;
std : : lock_guard lock ( cfg . gcmio_mutex ) ;
2020-01-16 21:40:47 +01:00
for ( const u32 end = ea + ( size > > 20 ) ; ea < end ; )
{
2021-03-02 12:59:19 +01:00
cfg . offsetTable . ioAddress [ ea + + ] = 0xFFFF ;
cfg . offsetTable . eaAddress [ io + + ] = 0xFFFF ;
2020-01-16 21:40:47 +01:00
}
2019-07-25 15:12:51 +02:00
}
}
2018-05-27 12:48:21 +02:00
2022-05-30 00:05:42 +02:00
// Pause RSX thread momentarily to handle unmapping
eng_lock elock ( this ) ;
2018-09-22 02:14:26 +02:00
// Queue up memory invalidation
2018-09-03 21:28:33 +02:00
std : : lock_guard lock ( m_mtx_task ) ;
2018-09-22 02:14:26 +02:00
const bool existing_range_valid = m_invalidated_memory_range . valid ( ) ;
const auto unmap_range = address_range : : start_length ( address , size ) ;
2018-09-25 14:21:04 +02:00
2018-09-22 02:14:26 +02:00
if ( existing_range_valid & & m_invalidated_memory_range . touches ( unmap_range ) )
{
// Merge range-to-invalidate in case of consecutive unmaps
m_invalidated_memory_range . set_min_max ( unmap_range ) ;
}
else
{
if ( existing_range_valid )
{
// We can only delay consecutive unmaps.
// Otherwise, to avoid VirtualProtect failures, we need to do the invalidation here
handle_invalidated_memory_range ( ) ;
}
m_invalidated_memory_range = unmap_range ;
}
2022-05-14 17:41:33 +02:00
2022-05-16 22:38:11 +02:00
m_eng_interrupt_mask | = rsx : : memory_config_interrupt ;
2018-07-27 21:07:34 +02:00
}
2018-05-23 11:55:14 +02:00
}
2018-09-22 02:14:26 +02:00
// NOTE: m_mtx_task lock must be acquired before calling this method
void thread : : handle_invalidated_memory_range ( )
{
2022-05-16 22:38:11 +02:00
m_eng_interrupt_mask . clear ( rsx : : memory_config_interrupt ) ;
2022-05-16 19:48:30 +02:00
2018-09-22 02:14:26 +02:00
if ( ! m_invalidated_memory_range . valid ( ) )
return ;
2022-07-04 15:02:17 +02:00
if ( is_stopped ( ) )
{
on_invalidate_memory_range ( m_invalidated_memory_range , rsx : : invalidation_cause : : read ) ;
on_invalidate_memory_range ( m_invalidated_memory_range , rsx : : invalidation_cause : : write ) ;
}
2019-08-25 17:47:49 +02:00
on_invalidate_memory_range ( m_invalidated_memory_range , rsx : : invalidation_cause : : unmap ) ;
2018-09-22 02:14:26 +02:00
m_invalidated_memory_range . invalidate ( ) ;
}
2017-12-09 11:14:00 +01:00
//Pause/cont wrappers for FIFO ctrl. Never call this from rsx thread itself!
void thread : : pause ( )
{
2020-02-11 22:36:46 +01:00
external_interrupt_lock + + ;
2020-01-16 21:40:47 +01:00
while ( ! external_interrupt_ack )
2018-01-17 17:14:00 +01:00
{
2022-07-05 05:17:19 +02:00
if ( is_stopped ( ) )
2018-01-17 17:14:00 +01:00
break ;
2020-12-21 15:12:05 +01:00
utils : : pause ( ) ;
2018-01-17 17:14:00 +01:00
}
2017-12-09 11:14:00 +01:00
}
void thread : : unpause ( )
{
2018-09-24 15:03:25 +02:00
// TODO: Clean this shit up
2020-02-11 22:36:46 +01:00
external_interrupt_lock - - ;
}
void thread : : wait_pause ( )
{
do
{
if ( g_cfg . video . multithreaded_rsx )
{
2021-03-02 12:59:19 +01:00
g_fxo - > get < rsx : : dma_manager > ( ) . sync ( ) ;
2020-02-11 22:36:46 +01:00
}
external_interrupt_ack . store ( true ) ;
while ( external_interrupt_lock )
{
// TODO: Investigate non busy-spinning method
2020-12-21 15:12:05 +01:00
utils : : pause ( ) ;
2020-02-11 22:36:46 +01:00
}
external_interrupt_ack . store ( false ) ;
}
while ( external_interrupt_lock ) ;
2017-12-09 11:14:00 +01:00
}
2018-01-17 17:14:00 +01:00
2018-02-28 12:46:39 +01:00
u32 thread : : get_load ( )
{
//Average load over around 30 frames
if ( ! performance_counters . last_update_timestamp | | performance_counters . sampled_frames > 30 )
{
2022-03-06 13:09:28 +01:00
const auto timestamp = rsx : : uclock ( ) ;
2018-02-28 12:46:39 +01:00
const auto idle = performance_counters . idle_time . load ( ) ;
const auto elapsed = timestamp - performance_counters . last_update_timestamp ;
if ( elapsed > idle )
2019-12-03 23:34:23 +01:00
performance_counters . approximate_load = static_cast < u32 > ( ( elapsed - idle ) * 100 / elapsed ) ;
2018-02-28 12:46:39 +01:00
else
2018-03-23 16:05:56 +01:00
performance_counters . approximate_load = 0u ;
2018-02-28 12:46:39 +01:00
performance_counters . idle_time = 0 ;
performance_counters . sampled_frames = 0 ;
performance_counters . last_update_timestamp = timestamp ;
}
return performance_counters . approximate_load ;
}
2019-09-17 13:26:03 +02:00
void thread : : on_frame_end ( u32 buffer , bool forced )
2018-07-20 16:22:21 +02:00
{
2019-09-17 13:26:03 +02:00
// Marks the end of a frame scope GPU-side
2020-04-22 20:36:10 +02:00
if ( g_user_asked_for_frame_capture . exchange ( false ) & & ! capture_current_frame )
2018-07-20 16:22:21 +02:00
{
capture_current_frame = true ;
frame_debug . reset ( ) ;
frame_capture . reset ( ) ;
// random number just to jumpstart the size
frame_capture . replay_commands . reserve ( 8000 ) ;
// capture first tile state with nop cmd
rsx : : frame_capture_data : : replay_command replay_cmd ;
replay_cmd . rsx_command = std : : make_pair ( NV4097_NO_OPERATION , 0 ) ;
frame_capture . replay_commands . push_back ( replay_cmd ) ;
capture : : capture_display_tile_state ( this , frame_capture . replay_commands . back ( ) ) ;
}
else if ( capture_current_frame )
{
capture_current_frame = false ;
2021-01-28 08:29:13 +01:00
const std : : string file_path = fs : : get_config_dir ( ) + " captures/ " + Emu . GetTitleID ( ) + " _ " + date_time : : current_time_narrow ( ) + " _capture.rrc " ;
// todo: may want to compress this data?
2021-06-01 18:13:05 +02:00
utils : : serial save_manager ;
save_manager . reserve ( 0x800'0000 ) ; // 128MB
save_manager ( frame_capture ) ;
2021-02-23 05:35:35 +01:00
fs : : pending_file temp ( file_path ) ;
2021-06-01 18:13:05 +02:00
if ( temp . file & & ( temp . file . write ( save_manager . data ) , temp . commit ( false ) ) )
2018-07-20 16:22:21 +02:00
{
2021-01-28 08:29:13 +01:00
rsx_log . success ( " Capture successful: %s " , file_path ) ;
}
else
{
rsx_log . fatal ( " Capture failed: %s (%s) " , file_path , fs : : g_tls_error ) ;
2018-07-20 16:22:21 +02:00
}
frame_capture . reset ( ) ;
Emu . Pause ( ) ;
}
2020-02-29 10:12:34 +01:00
if ( zcull_ctrl - > has_pending ( ) )
{
// NOTE: This is a workaround for buggy games.
// Some applications leave the zpass/stats gathering active but don't use the information.
// This can lead to the zcull unit using up all the memory queueing up operations that never get consumed.
// Seen in Diablo III and Yakuza 5
zcull_ctrl - > clear ( this , CELL_GCM_ZPASS_PIXEL_CNT | CELL_GCM_ZCULL_STATS ) ;
}
2019-09-19 19:08:06 +02:00
// Save current state
m_queued_flip . stats = m_frame_stats ;
2019-09-28 14:30:30 +02:00
m_queued_flip . push ( buffer ) ;
2019-09-19 19:08:06 +02:00
m_queued_flip . skip_frame = skip_current_frame ;
2020-02-05 08:00:08 +01:00
if ( ! forced ) [[likely]]
2019-09-17 13:26:03 +02:00
{
if ( ! g_cfg . video . disable_FIFO_reordering )
{
// Try to enable FIFO optimizations
// Only rarely useful for some games like RE4
2019-09-19 19:08:06 +02:00
m_flattener . evaluate_performance ( m_frame_stats . draw_calls ) ;
2019-09-17 13:26:03 +02:00
}
if ( g_cfg . video . frame_skip_enabled )
{
m_skip_frame_ctr + + ;
2020-04-14 19:13:52 +02:00
if ( m_skip_frame_ctr > = g_cfg . video . consecutive_frames_to_draw )
m_skip_frame_ctr = - g_cfg . video . consecutive_frames_to_skip ;
2019-09-19 19:08:06 +02:00
skip_current_frame = ( m_skip_frame_ctr < 0 ) ;
2019-09-17 13:26:03 +02:00
}
}
else
{
if ( ! g_cfg . video . disable_FIFO_reordering )
{
// Flattener is unusable due to forced random flips
m_flattener . force_disable ( ) ;
}
if ( g_cfg . video . frame_skip_enabled )
{
2020-02-01 09:07:25 +01:00
rsx_log . error ( " Frame skip is not compatible with this application " ) ;
2019-09-17 13:26:03 +02:00
}
}
2019-09-19 19:08:06 +02:00
// Reset current stats
m_frame_stats = { } ;
2020-12-14 18:26:20 +01:00
m_profiler . enabled = ! ! g_cfg . video . overlay ;
2019-09-17 13:26:03 +02:00
}
2022-07-04 15:02:17 +02:00
bool thread : : request_emu_flip ( u32 buffer )
2019-09-17 13:26:03 +02:00
{
2019-09-28 14:30:30 +02:00
if ( is_current_thread ( ) ) // requested through command buffer
2019-09-17 13:26:03 +02:00
{
// NOTE: The flip will clear any queued flip requests
handle_emu_flip ( buffer ) ;
}
else // requested 'manually' through ppu syscall
{
if ( async_flip_requested & flip_request : : emu_requested )
{
// ignore multiple requests until previous happens
2022-07-04 15:02:17 +02:00
return true ;
2019-09-17 13:26:03 +02:00
}
async_flip_buffer = buffer ;
async_flip_requested | = flip_request : : emu_requested ;
2022-07-04 15:02:17 +02:00
2022-05-16 22:38:11 +02:00
m_eng_interrupt_mask | = rsx : : display_interrupt ;
2022-07-04 15:02:17 +02:00
if ( state & cpu_flag : : exit )
{
// Resubmit possibly-ignored flip on savestate load
return false ;
}
2019-09-17 13:26:03 +02:00
}
2022-07-04 15:02:17 +02:00
return true ;
2019-09-17 13:26:03 +02:00
}
void thread : : handle_emu_flip ( u32 buffer )
{
2019-09-20 12:01:47 +02:00
if ( m_queued_flip . in_progress )
{
// Rescursion not allowed!
return ;
}
2019-09-28 14:30:30 +02:00
if ( ! m_queued_flip . pop ( buffer ) )
2019-09-17 13:26:03 +02:00
{
// Frame was not queued before flipping
on_frame_end ( buffer , true ) ;
2020-12-09 08:47:45 +01:00
ensure ( m_queued_flip . pop ( buffer ) ) ;
2019-09-17 13:26:03 +02:00
}
2018-07-20 16:22:21 +02:00
double limit = 0. ;
2022-05-21 18:39:14 +02:00
const auto frame_limit = g_disable_frame_limit ? frame_limit_type : : none : g_cfg . video . frame_limit ;
switch ( frame_limit )
2018-07-20 16:22:21 +02:00
{
2022-09-06 17:59:23 +02:00
case frame_limit_type : : none : limit = g_cfg . core . max_cpu_preempt_count_per_frame ? static_cast < double > ( g_cfg . video . vblank_rate ) : 0. ; break ;
2018-07-20 16:22:21 +02:00
case frame_limit_type : : _50 : limit = 50. ; break ;
case frame_limit_type : : _60 : limit = 60. ; break ;
case frame_limit_type : : _30 : limit = 30. ; break ;
2022-05-21 14:58:50 +02:00
case frame_limit_type : : _auto : limit = static_cast < double > ( g_cfg . video . vblank_rate ) ; break ;
case frame_limit_type : : _ps3 : limit = 0. ; break ;
2022-05-21 18:39:14 +02:00
case frame_limit_type : : infinite : limit = 0. ; break ;
2019-05-11 07:36:16 +02:00
default :
break ;
2018-07-20 16:22:21 +02:00
}
2022-10-23 05:43:01 +02:00
if ( double limit2 = g_cfg . video . second_frame_limit ; limit2 > = 0.1 & & ( limit2 < limit | | ! limit ) )
{
// Apply a second limit
limit = limit2 ;
}
2018-07-20 16:22:21 +02:00
if ( limit )
{
2020-03-07 23:11:35 +01:00
const u64 needed_us = static_cast < u64 > ( 1000000 / limit ) ;
2022-10-08 19:09:01 +02:00
const u64 time = std : : max < u64 > ( get_system_time ( ) , target_rsx_flip_time > needed_us ? target_rsx_flip_time - needed_us : 0 ) ;
2018-07-20 16:22:21 +02:00
2022-10-08 19:09:01 +02:00
if ( int_flip_index )
2018-07-20 16:22:21 +02:00
{
2020-03-07 23:11:35 +01:00
if ( target_rsx_flip_time > time + 1000 )
2018-07-20 16:22:21 +02:00
{
2020-03-07 23:11:35 +01:00
const auto delay_us = target_rsx_flip_time - time ;
2022-08-08 07:33:49 +02:00
lv2_obj : : wait_timeout ( delay_us , nullptr , false ) ;
2018-07-20 16:22:21 +02:00
performance_counters . idle_time + = delay_us ;
}
}
2022-06-25 13:54:05 +02:00
2022-10-08 19:09:01 +02:00
target_rsx_flip_time = std : : max ( time , target_rsx_flip_time ) + needed_us ;
2022-06-25 13:54:05 +02:00
flip_notification_count = 1 ;
2018-07-20 16:22:21 +02:00
}
2022-05-21 18:39:14 +02:00
else if ( frame_limit = = frame_limit_type : : _ps3 )
2022-05-21 12:05:35 +02:00
{
2022-05-21 18:39:14 +02:00
bool exit = false ;
if ( vblank_at_flip = = umax )
{
vblank_at_flip = + vblank_count ;
2022-06-25 13:54:05 +02:00
flip_notification_count = 1 ;
2022-05-21 18:39:14 +02:00
exit = true ;
}
if ( requested_vsync & & ( exit | | vblank_at_flip = = vblank_count ) )
2022-05-21 12:05:35 +02:00
{
// Not yet signaled, handle it later
async_flip_requested | = flip_request : : emu_requested ;
2022-05-21 13:36:33 +02:00
async_flip_buffer = buffer ;
2022-05-21 12:05:35 +02:00
return ;
}
2022-05-21 18:39:14 +02:00
vblank_at_flip = umax ;
2022-05-21 12:05:35 +02:00
}
2022-06-25 15:37:46 +02:00
else
{
flip_notification_count = 1 ;
}
2018-07-20 16:22:21 +02:00
2022-06-25 13:54:05 +02:00
int_flip_index + = flip_notification_count ;
2019-09-19 19:08:06 +02:00
2018-07-20 16:22:21 +02:00
current_display_buffer = buffer ;
2019-09-19 19:08:06 +02:00
m_queued_flip . emu_flip = true ;
2019-09-20 12:01:47 +02:00
m_queued_flip . in_progress = true ;
2022-06-12 13:43:16 +02:00
m_queued_flip . skip_frame | = g_cfg . video . disable_video_output & & ! g_cfg . video . perf_overlay . perf_overlay_enabled ;
2019-09-28 14:30:30 +02:00
2019-09-19 19:08:06 +02:00
flip ( m_queued_flip ) ;
2018-07-20 16:22:21 +02:00
2022-04-13 21:29:26 +02:00
last_guest_flip_timestamp = rsx : : uclock ( ) - 1000000 ;
2018-07-20 16:22:21 +02:00
flip_status = CELL_GCM_DISPLAY_FLIP_STATUS_DONE ;
2019-09-20 12:01:47 +02:00
m_queued_flip . in_progress = false ;
2018-07-20 16:22:21 +02:00
2022-06-25 13:54:05 +02:00
while ( flip_notification_count - - )
2020-03-08 07:29:47 +01:00
{
2022-06-25 13:54:05 +02:00
if ( ! isHLE )
{
sys_rsx_context_attribute ( 0x55555555 , 0xFEC , buffer , 0 , 0 , 0 ) ;
2022-07-07 09:35:24 +02:00
if ( unsent_gcm_events )
{
// TODO: A proper fix
return ;
}
2022-06-25 13:54:05 +02:00
continue ;
}
2020-03-08 07:29:47 +01:00
2022-06-25 13:54:05 +02:00
if ( auto ptr = flip_handler )
{
intr_thread - > cmd_list
( {
{ ppu_cmd : : set_args , 1 } , u64 { 1 } ,
{ ppu_cmd : : lle_call , ptr } ,
{ ppu_cmd : : sleep , 0 }
} ) ;
2018-07-20 16:22:21 +02:00
2022-06-25 13:54:05 +02:00
intr_thread - > cmd_notify + + ;
intr_thread - > cmd_notify . notify_one ( ) ;
}
2018-07-20 16:22:21 +02:00
}
2022-09-06 17:59:23 +02:00
}
void thread : : evaluate_cpu_usage_reduction_limits ( )
{
const u64 max_preempt_count = g_cfg . core . max_cpu_preempt_count_per_frame ;
if ( ! max_preempt_count )
{
frame_times . clear ( ) ;
lv2_obj : : set_yield_frequency ( 0 , 0 ) ;
return ;
}
const u64 current_time = get_system_time ( ) ;
const u64 current_tsc = utils : : get_tsc ( ) ;
u64 preempt_count = 0 ;
if ( frame_times . size ( ) > = 60 )
{
u64 diffs = 0 ;
for ( usz i = 1 ; i < frame_times . size ( ) ; i + + )
{
const u64 cur_diff = frame_times [ i ] . timestamp - frame_times [ i - 1 ] . timestamp ;
diffs + = cur_diff ;
}
const usz avg_frame_time = diffs / 59 ;
u32 lowered_delay = 0 ;
2022-09-27 19:34:11 +02:00
u32 raised_delay = 0 ;
2022-09-06 17:59:23 +02:00
bool can_reevaluate = true ;
u64 prev_preempt_count = umax ;
for ( usz i = frame_times . size ( ) - 30 ; i < frame_times . size ( ) ; i + + )
{
if ( prev_preempt_count = = umax )
{
prev_preempt_count = frame_times [ i ] . preempt_count ;
continue ;
}
if ( prev_preempt_count ! = frame_times [ i ] . preempt_count )
{
if ( prev_preempt_count > frame_times [ i ] . preempt_count )
{
lowered_delay + + ;
}
else if ( prev_preempt_count < frame_times [ i ] . preempt_count )
{
2022-09-27 19:34:11 +02:00
raised_delay + + ;
2022-09-06 17:59:23 +02:00
}
if ( i > frame_times . size ( ) - 30 )
{
// Slow preemption count increase
can_reevaluate = false ;
}
}
prev_preempt_count = frame_times [ i ] . preempt_count ;
}
2022-09-27 19:34:11 +02:00
preempt_count = std : : min < u64 > ( frame_times . back ( ) . preempt_count , max_preempt_count ) ;
2022-09-06 17:59:23 +02:00
u32 fails = 0 ;
u32 hard_fails = 0 ;
bool is_last_frame_a_fail = false ;
auto abs_dst = [ ] ( u64 a , u64 b )
{
return a > = b ? a - b : b - a ;
} ;
for ( u32 i = 1 ; i < = frame_times . size ( ) ; i + + )
{
const u64 cur_diff = ( i = = frame_times . size ( ) ? current_time : frame_times [ i ] . timestamp ) - frame_times [ i - 1 ] . timestamp ;
if ( const u64 diff_of_diff = abs_dst ( cur_diff , avg_frame_time ) ;
2022-09-14 14:03:51 +02:00
diff_of_diff > = avg_frame_time / 7 )
2022-09-06 17:59:23 +02:00
{
2022-09-14 14:03:51 +02:00
if ( diff_of_diff > = avg_frame_time / 3 )
2022-09-06 17:59:23 +02:00
{
2022-09-27 19:34:11 +02:00
raised_delay + + ;
2022-09-06 17:59:23 +02:00
hard_fails + + ;
if ( i = = frame_times . size ( ) )
{
is_last_frame_a_fail = true ;
}
}
if ( fails ! = umax )
{
fails + + ;
}
}
}
bool hard_measures_taken = false ;
const usz fps_10 = 10'000'000 / avg_frame_time ;
auto lower_preemption_count = [ & ] ( )
{
if ( preempt_count > = 10 )
{
preempt_count - = 10 ;
}
else
{
preempt_count = 0 ;
}
2022-09-14 14:03:51 +02:00
if ( ( hard_fails > 2 | | fails > 20 ) & & is_last_frame_a_fail )
2022-09-06 17:59:23 +02:00
{
hard_measures_taken = preempt_count > 1 ;
preempt_count = preempt_count * 7 / 8 ;
prevent_preempt_increase_tickets = 10 ;
}
else
{
prevent_preempt_increase_tickets = std : : max < u32 > ( 7 , prevent_preempt_increase_tickets ) ;
}
} ;
2022-09-15 07:00:05 +02:00
const u64 vblank_rate_10 = g_cfg . video . vblank_rate * 10 ;
2022-09-06 17:59:23 +02:00
if ( can_reevaluate )
{
2022-09-15 07:00:05 +02:00
const bool is_avg_fps_ok = ( abs_dst ( fps_10 , 300 ) < 3 | | abs_dst ( fps_10 , 600 ) < 4 | | abs_dst ( fps_10 , vblank_rate_10 ) < 4 | | abs_dst ( fps_10 , vblank_rate_10 / 2 ) < 3 ) ;
2022-09-06 17:59:23 +02:00
if ( ! hard_fails & & fails < 6 & & is_avg_fps_ok )
{
if ( prevent_preempt_increase_tickets )
{
prevent_preempt_increase_tickets - - ;
}
2022-09-14 10:47:25 +02:00
else
2022-09-06 17:59:23 +02:00
{
2022-09-27 19:34:11 +02:00
preempt_count = std : : min < u64 > ( preempt_count + 4 , max_preempt_count ) ;
2022-09-06 17:59:23 +02:00
}
}
else
{
lower_preemption_count ( ) ;
}
}
// Sudden FPS drop detection
2022-09-27 19:34:11 +02:00
else if ( ( fails > 13 | | hard_fails > 2 | | ! ( abs_dst ( fps_10 , 300 ) < 20 | | abs_dst ( fps_10 , 600 ) < 30 | | abs_dst ( fps_10 , g_cfg . video . vblank_rate * 10 ) < 30 | | abs_dst ( fps_10 , g_cfg . video . vblank_rate * 10 / 2 ) < 20 ) ) & & lowered_delay < raised_delay & & is_last_frame_a_fail )
2022-09-06 17:59:23 +02:00
{
lower_preemption_count ( ) ;
}
2022-09-27 19:34:11 +02:00
perf_log . trace ( " CPU preemption control: reeval=%d, preempt_count=%llu, fails=%u, hard=%u, avg_frame_time=%llu, highered=%u, lowered=%u, taken=%u " , can_reevaluate , preempt_count , fails , hard_fails , avg_frame_time , raised_delay , lowered_delay , : : g_lv2_preempts_taken . load ( ) ) ;
2022-09-06 17:59:23 +02:00
if ( hard_measures_taken )
{
2022-09-27 19:34:11 +02:00
preempt_fail_old_preempt_count = std : : max < u64 > ( preempt_fail_old_preempt_count , std : : min < u64 > ( frame_times . back ( ) . preempt_count , max_preempt_count ) ) ;
2022-09-06 17:59:23 +02:00
}
else if ( preempt_fail_old_preempt_count )
{
2022-09-27 19:34:11 +02:00
perf_log . error ( " Lowering current preemption count significantly due to a performance drop, if this issue persists frequently consider lowering max preemptions count to 'new-count' or lower. (old-count=%llu, new-count=%llu) " , preempt_fail_old_preempt_count , preempt_count ) ;
2022-09-06 17:59:23 +02:00
preempt_fail_old_preempt_count = 0 ;
}
const u64 tsc_diff = ( current_tsc - frame_times . back ( ) . tsc ) ;
2022-09-15 07:00:05 +02:00
const u64 time_diff = ( current_time - frame_times . back ( ) . timestamp ) ;
2022-09-27 19:34:11 +02:00
const u64 preempt_diff = tsc_diff * ( 1'000'000 / 30 ) / ( time_diff * std : : max < u64 > ( preempt_count , 1ull ) ) ;
2022-09-15 07:00:05 +02:00
if ( ! preempt_count )
{
lv2_obj : : set_yield_frequency ( 0 , 0 ) ;
}
else if ( abs_dst ( fps_10 , 300 ) < 30 )
{
// Set an upper limit so a backoff technique would be taken if there is a sudden performance drop
// Allow 4% of no yield to reduce significantly the risk of stutter
lv2_obj : : set_yield_frequency ( preempt_diff , current_tsc + ( tsc_diff * ( 1'000'000 * 96 / ( 30 * 100 ) ) / time_diff ) ) ;
}
else if ( abs_dst ( fps_10 , 600 ) < 40 )
{
// 5% for 60fps
lv2_obj : : set_yield_frequency ( preempt_diff , current_tsc + ( tsc_diff * ( 1'000'000 * 94 / ( 60 * 100 ) ) / time_diff ) ) ;
}
else if ( abs_dst ( fps_10 , vblank_rate_10 ) < 40 )
{
lv2_obj : : set_yield_frequency ( preempt_diff , current_tsc + ( tsc_diff * ( 1'000'000 * 94 / ( vblank_rate_10 * 10 ) ) / time_diff ) ) ;
}
else if ( abs_dst ( fps_10 , vblank_rate_10 / 2 ) < 30 )
{
lv2_obj : : set_yield_frequency ( preempt_diff , current_tsc + ( tsc_diff * ( 1'000'000 * 96 / ( ( vblank_rate_10 / 2 ) * 10 ) ) / time_diff ) ) ;
}
else
{
// Undetected case, last 12% is with no yield
lv2_obj : : set_yield_frequency ( preempt_diff , current_tsc + ( tsc_diff * 88 / 100 ) ) ;
}
2022-09-06 17:59:23 +02:00
frame_times . pop_front ( ) ;
}
else
{
lv2_obj : : set_yield_frequency ( 0 , 0 ) ;
}
frame_times . push_back ( frame_time_t { preempt_count , current_time , current_tsc } ) ;
2018-07-20 16:22:21 +02:00
}
2014-12-20 18:14:27 +01:00
}