diff --git a/src/xenia/gpu/command_processor.cc b/src/xenia/gpu/command_processor.cc index 7598ff748..10f11e95d 100644 --- a/src/xenia/gpu/command_processor.cc +++ b/src/xenia/gpu/command_processor.cc @@ -1123,12 +1123,12 @@ bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_EXT(RingBuffer* reader, // drawcall. // https://www.google.com/patents/US20060055701 uint16_t extents[] = { - 0 >> 3, // min x - 8192 >> 3, // max x - 0 >> 3, // min y - 8192 >> 3, // max y - 0, // min z - 1, // max z + 0 >> 3, // min x + xenos::kTexture2DCubeMaxWidthHeight >> 3, // max x + 0 >> 3, // min y + xenos::kTexture2DCubeMaxWidthHeight >> 3, // max y + 0, // min z + 1, // max z }; assert_true(endianness == xenos::Endian::k8in16); xe::copy_and_swap_16_unaligned(memory_->TranslatePhysical(address), extents, diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 7e0be3cd4..878596018 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -3065,11 +3065,11 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( // Conversion to Direct3D 12 normalized device coordinates. // See viewport configuration in UpdateFixedFunctionState for explanations. // X and Y scale/offset is to convert unnormalized coordinates generated by - // shaders (for rectangle list drawing, for instance) to the 8192x8192 - // viewport (the maximum render target size) that is used to emulate - // unnormalized coordinates. Z scale/offset is to convert from OpenGL NDC to - // Direct3D NDC if needed. Also apply half-pixel offset to reproduce Direct3D - // 9 rasterization rules - must be done before clipping, not through the + // shaders (for rectangle list drawing, for instance) to the viewport of the + // largest possible render target size that is used to emulate unnormalized + // coordinates. Z scale/offset is to convert from OpenGL NDC to Direct3D NDC + // if needed. Also apply half-pixel offset to reproduce Direct3D 9 + // rasterization rules - must be done before clipping, not through the // viewport, for SSAA and resolution scale to work correctly. float viewport_scale_x = regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32; float viewport_scale_y = regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32; @@ -3116,14 +3116,14 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( ndc_offset_x += 0.5f / viewport_scale_x; } } else { - ndc_offset_x += 1.0f / 8192.0f; + ndc_offset_x += 1.0f / xenos::kTexture2DCubeMaxWidthHeight; } if (pa_cl_vte_cntl.vport_y_scale_ena) { if (viewport_scale_y != 0.0f) { ndc_offset_y += 0.5f / viewport_scale_y; } } else { - ndc_offset_y -= 1.0f / 8192.0f; + ndc_offset_y -= 1.0f / xenos::kTexture2DCubeMaxWidthHeight; } } dirty |= system_constants_.ndc_scale[0] != ndc_scale_x; @@ -3158,13 +3158,13 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( point_screen_to_ndc_x = (viewport_scale_x != 0.0f) ? (0.5f / viewport_scale_x) : 0.0f; } else { - point_screen_to_ndc_x = 1.0f / 8192.0f; + point_screen_to_ndc_x = 1.0f / xenos::kTexture2DCubeMaxWidthHeight; } if (pa_cl_vte_cntl.vport_y_scale_ena) { point_screen_to_ndc_y = (viewport_scale_y != 0.0f) ? (-0.5f / viewport_scale_y) : 0.0f; } else { - point_screen_to_ndc_y = -1.0f / 8192.0f; + point_screen_to_ndc_y = -1.0f / xenos::kTexture2DCubeMaxWidthHeight; } dirty |= system_constants_.point_screen_to_ndc[0] != point_screen_to_ndc_x; dirty |= system_constants_.point_screen_to_ndc[1] != point_screen_to_ndc_y; diff --git a/src/xenia/gpu/d3d12/render_target_cache.cc b/src/xenia/gpu/d3d12/render_target_cache.cc index 76b3e1feb..f614cea34 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.cc +++ b/src/xenia/gpu/d3d12/render_target_cache.cc @@ -1487,7 +1487,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, // of the buffer. root_constants.tile_sample_dest_base -= dest_address & ~0xFFFu; } - assert_true(dest_pitch <= 8192); + assert_true(dest_pitch <= xenos::kTexture2DCubeMaxWidthHeight); root_constants.tile_sample_dest_info = ((dest_pitch + 31) >> 5) | (rb_copy_dest_info.copy_dest_array ? (((dest_height + 31) >> 5) << 9) diff --git a/src/xenia/gpu/d3d12/render_target_cache.h b/src/xenia/gpu/d3d12/render_target_cache.h index c92e238c2..07bbabb86 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.h +++ b/src/xenia/gpu/d3d12/render_target_cache.h @@ -219,10 +219,10 @@ class D3D12CommandProcessor; // other, and because the height is unknown (and the viewport and scissor are // not always present - D3DPT_RECTLIST is used very commonly, especially for // clearing (Direct3D 9 Clear is implemented this way on the Xbox 360) and -// copying, and it's usually drawn without a viewport and with 8192x8192 -// scissor), there may be cases of simultaneously bound render targets -// overlapping each other in the EDRAM in a way that is difficult to resolve, -// and stores/loads may destroy data. +// copying, and it's usually drawn without a viewport and with the scissor of +// the maximum possible size), there may be cases of simultaneously bound +// render targets overlapping each other in the EDRAM in a way that is +// difficult to resolve, and stores/loads may destroy data. // // ============================================================================= // 2x width and height scaling implementation: diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc index a8229a6c6..6e6fd5c87 100644 --- a/src/xenia/gpu/d3d12/texture_cache.cc +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -2189,7 +2189,9 @@ void TextureCache::BindingInfoFromFetchConstant( // No texture data at all. return; } - if (fetch.dimension == xenos::DataDimension::k1D && width > 8192) { + // TODO(Triang3l): Support long 1D textures. + if (fetch.dimension == xenos::DataDimension::k1D && + width > xenos::kTexture2DCubeMaxWidthHeight) { XELOGE( "1D texture is too wide ({}) - ignoring! " "Report the game to Xenia developers", diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index 0fb118650..370cb3b3a 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -450,7 +450,7 @@ void DxbcShaderTranslator::StartVertexShader_LoadVertexIndex() { void DxbcShaderTranslator::StartVertexOrDomainShader() { // Zero the interpolators. - for (uint32_t i = 0; i < kInterpolatorCount; ++i) { + for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) { DxbcOpMov(DxbcDest::O(uint32_t(InOutRegister::kVSDSOutInterpolators) + i), DxbcSrc::LF(0.0f)); } @@ -647,7 +647,8 @@ void DxbcShaderTranslator::StartPixelShader() { DxbcOpMov(DxbcDest::ODepth(), DxbcSrc::LF(0.0f)); } - uint32_t interpolator_count = std::min(kInterpolatorCount, register_count()); + uint32_t interpolator_count = + std::min(xenos::kMaxInterpolators, register_count()); if (interpolator_count != 0) { // Copy interpolants to GPRs. if (edram_rov_used_) { @@ -960,7 +961,7 @@ void DxbcShaderTranslator::StartTranslation() { // Zero general-purpose registers to prevent crashes when the game // references them after only initializing them conditionally. - for (uint32_t i = IsDxbcPixelShader() ? kInterpolatorCount : 0; + for (uint32_t i = IsDxbcPixelShader() ? xenos::kMaxInterpolators : 0; i < register_count(); ++i) { DxbcOpMov(uses_register_dynamic_addressing() ? DxbcDest::X(0, i) : DxbcDest::R(i), @@ -2813,13 +2814,13 @@ void DxbcShaderTranslator::WriteInputSignature() { // Intepolators (TEXCOORD#). size_t interpolator_position = shader_object_.size(); shader_object_.resize(shader_object_.size() + - kInterpolatorCount * kParameterDwords); - parameter_count += kInterpolatorCount; + xenos::kMaxInterpolators * kParameterDwords); + parameter_count += xenos::kMaxInterpolators; { DxbcSignatureParameter* interpolators = reinterpret_cast(shader_object_.data() + interpolator_position); - for (uint32_t i = 0; i < kInterpolatorCount; ++i) { + for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) { DxbcSignatureParameter& interpolator = interpolators[i]; interpolator.semantic_index = i; interpolator.component_type = @@ -2909,7 +2910,7 @@ void DxbcShaderTranslator::WriteInputSignature() { DxbcSignatureParameter* interpolators = reinterpret_cast(shader_object_.data() + interpolator_position); - for (uint32_t i = 0; i < kInterpolatorCount; ++i) { + for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) { interpolators[i].semantic_name = semantic_offset; } DxbcSignatureParameter& point_parameters = @@ -3077,13 +3078,13 @@ void DxbcShaderTranslator::WriteOutputSignature() { // Intepolators (TEXCOORD#). size_t interpolator_position = shader_object_.size(); shader_object_.resize(shader_object_.size() + - kInterpolatorCount * kParameterDwords); - parameter_count += kInterpolatorCount; + xenos::kMaxInterpolators * kParameterDwords); + parameter_count += xenos::kMaxInterpolators; { DxbcSignatureParameter* interpolators = reinterpret_cast(shader_object_.data() + interpolator_position); - for (uint32_t i = 0; i < kInterpolatorCount; ++i) { + for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) { DxbcSignatureParameter& interpolator = interpolators[i]; interpolator.semantic_index = i; interpolator.component_type = @@ -3197,7 +3198,7 @@ void DxbcShaderTranslator::WriteOutputSignature() { DxbcSignatureParameter* interpolators = reinterpret_cast(shader_object_.data() + interpolator_position); - for (uint32_t i = 0; i < kInterpolatorCount; ++i) { + for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) { interpolators[i].semantic_name = semantic_offset; } DxbcSignatureParameter& point_parameters = @@ -3665,7 +3666,7 @@ void DxbcShaderTranslator::WriteShaderCode() { } } // Interpolator output. - for (uint32_t i = 0; i < kInterpolatorCount; ++i) { + for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) { shader_object_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_OUTPUT) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); @@ -3727,7 +3728,7 @@ void DxbcShaderTranslator::WriteShaderCode() { // Interpolator input. if (!is_depth_only_pixel_shader_) { uint32_t interpolator_count = - std::min(kInterpolatorCount, register_count()); + std::min(xenos::kMaxInterpolators, register_count()); for (uint32_t i = 0; i < interpolator_count; ++i) { shader_object_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT_PS) | diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index a62608625..e1446e4a7 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -2049,8 +2049,7 @@ class DxbcShaderTranslator : public ShaderTranslator { static_assert(kSysConst_Count <= 64, "Too many system constants, can't use uint64_t for usage bits"); - static constexpr uint32_t kInterpolatorCount = 16; - static constexpr uint32_t kPointParametersTexCoord = kInterpolatorCount; + static constexpr uint32_t kPointParametersTexCoord = xenos::kMaxInterpolators; static constexpr uint32_t kClipSpaceZWTexCoord = kPointParametersTexCoord + 1; enum class InOutRegister : uint32_t { @@ -2061,7 +2060,7 @@ class DxbcShaderTranslator : public ShaderTranslator { kDSInControlPointIndex = 0, kVSDSOutInterpolators = 0, - kVSDSOutPointParameters = kVSDSOutInterpolators + kInterpolatorCount, + kVSDSOutPointParameters = kVSDSOutInterpolators + xenos::kMaxInterpolators, kVSDSOutClipSpaceZW, kVSDSOutPosition, // Clip and cull distances must be tightly packed in Direct3D! @@ -2073,7 +2072,7 @@ class DxbcShaderTranslator : public ShaderTranslator { // kill. kPSInInterpolators = 0, - kPSInPointParameters = kPSInInterpolators + kInterpolatorCount, + kPSInPointParameters = kPSInInterpolators + xenos::kMaxInterpolators, kPSInClipSpaceZW, kPSInPosition, kPSInFrontFace, diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h index 01f98df21..07de1f3fc 100644 --- a/src/xenia/gpu/xenos.h +++ b/src/xenia/gpu/xenos.h @@ -549,6 +549,8 @@ enum class VertexShaderExportMode : uint32_t { kMultipass = 7, }; +constexpr uint32_t kMaxInterpolators = 16; + enum class SampleControl : uint32_t { kCentroidsOnly = 0, kCentersOnly = 1, @@ -570,10 +572,10 @@ inline uint32_t GetInterpolatorSamplingPattern( uint32_t interpolator_control_sampling_pattern) { if (msaa_samples == MsaaSamples::k1X || sample_control == SampleControl::kCentersOnly) { - return ((1 << 16) - 1) * uint32_t(SampleLocation::kCenter); + return ((1 << kMaxInterpolators) - 1) * uint32_t(SampleLocation::kCenter); } if (sample_control == SampleControl::kCentroidsOnly) { - return ((1 << 16) - 1) * uint32_t(SampleLocation::kCentroid); + return ((1 << kMaxInterpolators) - 1) * uint32_t(SampleLocation::kCentroid); } assert_true(sample_control == SampleControl::kCentroidsAndCenters); return interpolator_control_sampling_pattern; @@ -722,6 +724,21 @@ XEPACKEDUNION(xe_gpu_vertex_fetch_t, { }); }); +// Texture fetch constant size field widths. +constexpr uint32_t kTexture1DMaxWidthLog2 = 24; +constexpr uint32_t kTexture1DMaxWidth = uint32_t(1) << kTexture1DMaxWidthLog2; +constexpr uint32_t kTexture2DCubeMaxWidthHeightLog2 = 13; +constexpr uint32_t kTexture2DCubeMaxWidthHeight = + uint32_t(1) << kTexture2DCubeMaxWidthHeightLog2; +constexpr uint32_t kTexture2DMaxStackDepthLog2 = 6; +constexpr uint32_t kTexture2DMaxStackDepth = uint32_t(1) + << kTexture2DMaxStackDepthLog2; +constexpr uint32_t kTexture3DMaxWidthHeightLog2 = 11; +constexpr uint32_t kTexture3DMaxWidthHeight = uint32_t(1) + << kTexture3DMaxWidthHeightLog2; +constexpr uint32_t kTexture3DMaxDepthLog2 = 10; +constexpr uint32_t kTexture3DMaxDepth = uint32_t(1) << kTexture3DMaxDepthLog2; + // XE_GPU_REG_SHADER_CONSTANT_FETCH_* XEPACKEDUNION(xe_gpu_texture_fetch_t, { XEPACKEDSTRUCTANONYMOUS({ @@ -752,6 +769,7 @@ XEPACKEDUNION(xe_gpu_texture_fetch_t, { uint32_t nearest_clamp_policy : 1; // +11 d3d/opengl uint32_t base_address : 20; // +12 base address >> 12 + // Size is stored with 1 subtracted from each component. union { // dword_2 struct { uint32_t width : 24;