diff --git a/src/xenia/gpu/gpu_flags.cc b/src/xenia/gpu/gpu_flags.cc index d70ddf322..311715fad 100644 --- a/src/xenia/gpu/gpu_flags.cc +++ b/src/xenia/gpu/gpu_flags.cc @@ -55,10 +55,14 @@ DEFINE_bool( "when MSAA is used with fullscreen passes.", "GPU"); -DEFINE_int32(query_occlusion_fake_sample_count, 100, +DEFINE_int32(query_occlusion_sample_lower_threshold, 80, "If set to -1 no sample counts are written, games may hang. Else, " "the sample count of every tile will be incremented on every " "EVENT_WRITE_ZPD by this number. Setting this to 0 means " "everything is reported as occluded.", "GPU"); -UPDATE_from_int32(query_occlusion_fake_sample_count, 2024, 9, 23, 9, 1000); +DEFINE_int32( + query_occlusion_sample_upper_threshold, 100, + "Set to higher number than query_occlusion_sample_lower_threshold. This " + "value is ignored if query_occlusion_sample_lower_threshold is set to -1.", + "GPU"); diff --git a/src/xenia/gpu/gpu_flags.h b/src/xenia/gpu/gpu_flags.h index b7a82b1f8..3e76026eb 100644 --- a/src/xenia/gpu/gpu_flags.h +++ b/src/xenia/gpu/gpu_flags.h @@ -26,7 +26,9 @@ DECLARE_bool(non_seamless_cube_map); DECLARE_bool(half_pixel_offset); -DECLARE_int32(query_occlusion_fake_sample_count); +DECLARE_int32(query_occlusion_sample_lower_threshold); + +DECLARE_int32(query_occlusion_sample_upper_threshold); DECLARE_bool(disassemble_pm4); diff --git a/src/xenia/gpu/pm4_command_processor_implement.h b/src/xenia/gpu/pm4_command_processor_implement.h index 5b04df510..614bf0f89 100644 --- a/src/xenia/gpu/pm4_command_processor_implement.h +++ b/src/xenia/gpu/pm4_command_processor_implement.h @@ -953,6 +953,9 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_EVENT_WRITE_EXT( trace_writer_.WriteMemoryWrite(CpuToGpu(address), sizeof(extents)); return true; } + +static uint32_t samples = cvars::query_occlusion_sample_upper_threshold; + XE_NOINLINE bool COMMAND_PROCESSOR::ExecutePacketType3_EVENT_WRITE_ZPD( uint32_t packet, uint32_t count) XE_RESTRICT { @@ -963,28 +966,33 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_EVENT_WRITE_ZPD( // Writeback initiator. COMMAND_PROCESSOR::WriteEventInitiator(initiator & 0x3F); + if (cvars::query_occlusion_sample_lower_threshold < 0) { + return true; + } // Occlusion queries: // This command is send on query begin and end. // As a workaround report some fixed amount of passed samples. - auto fake_sample_count = cvars::query_occlusion_fake_sample_count; - if (fake_sample_count >= 0) { - auto* pSampleCounts = - memory_->TranslatePhysical( - register_file_->values[XE_GPU_REG_RB_SAMPLE_COUNT_ADDR]); - // 0xFFFFFEED is written to this two locations by D3D only on D3DISSUE_END - // and used to detect a finished query. - bool is_end_via_z_pass = pSampleCounts->ZPass_A == kQueryFinished && - pSampleCounts->ZPass_B == kQueryFinished; - // Older versions of D3D also checks for ZFail (4D5307D5). - bool is_end_via_z_fail = pSampleCounts->ZFail_A == kQueryFinished && - pSampleCounts->ZFail_B == kQueryFinished; - std::memset(pSampleCounts, 0, sizeof(xe_gpu_depth_sample_counts)); - if (is_end_via_z_pass || is_end_via_z_fail) { - pSampleCounts->ZPass_A = fake_sample_count; - pSampleCounts->Total_A = fake_sample_count; - } + auto* pSampleCounts = memory_->TranslatePhysical( + register_file_->values[XE_GPU_REG_RB_SAMPLE_COUNT_ADDR]); + // 0xFFFFFEED is written to this two locations by D3D only on D3DISSUE_END + // and used to detect a finished query. + bool is_end_via_z_pass = pSampleCounts->ZPass_A == kQueryFinished && + pSampleCounts->ZPass_B == kQueryFinished; + // Older versions of D3D also checks for ZFail (4D5307D5). + bool is_end_via_z_fail = pSampleCounts->ZFail_A == kQueryFinished && + pSampleCounts->ZFail_B == kQueryFinished; + std::memset(pSampleCounts, 0, sizeof(xe_gpu_depth_sample_counts)); + if (is_end_via_z_pass || is_end_via_z_fail) { + pSampleCounts->ZPass_A = samples; + pSampleCounts->Total_A = samples; } + samples = + samples <= static_cast( + cvars::query_occlusion_sample_lower_threshold) + ? static_cast(cvars::query_occlusion_sample_upper_threshold) + : samples - 1; + return true; }