diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index a6c1a7f07..68db2b4d4 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -1931,8 +1931,7 @@ void SpirvShaderTranslator::StartFragmentShaderBeforeMain() { } void SpirvShaderTranslator::StartFragmentShaderInMain() { - // TODO(Triang3l): Allow memory export with resolution scaling only for the - // center host pixel, with sample shading (for depth format conversion) only + // TODO(Triang3l): With sample shading (for depth format conversion) only // for the bottom-right sample (unlike in Direct3D, the sample mask input // doesn't include covered samples of the primitive that correspond to other // invocations, so use the sample that's the most friendly to the half-pixel @@ -2088,7 +2087,6 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() { // see the actual hardware instructions in both OpBitwiseXor and OpFNegate // cases. spv::Id const_sign_bit = builder_->makeUintConstant(UINT32_C(1) << 31); - // TODO(Triang3l): Resolution scale inversion. // X - pixel X .0 in the magnitude, is back-facing in the sign bit. assert_true(input_fragment_coordinates_ != spv::NoResult); id_vector_temp_.clear(); @@ -2102,6 +2100,12 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() { input_fragment_coordinates_, id_vector_temp_), spv::NoPrecision))); + // Apply resolution scale inversion after truncating. + if (draw_resolution_scale_x_ > 1) { + param_gen_x = builder_->createBinOp( + spv::OpFMul, type_float_, param_gen_x, + builder_->makeFloatConstant(1.0f / float(draw_resolution_scale_x_))); + } if (!modification.pixel.param_gen_point) { assert_true(input_front_facing_ != spv::NoResult); param_gen_x = builder_->createTriOp( @@ -2137,6 +2141,12 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() { input_fragment_coordinates_, id_vector_temp_), spv::NoPrecision))); + // Apply resolution scale inversion after truncating. + if (draw_resolution_scale_y_ > 1) { + param_gen_y = builder_->createBinOp( + spv::OpFMul, type_float_, param_gen_y, + builder_->makeFloatConstant(1.0f / float(draw_resolution_scale_y_))); + } if (modification.pixel.param_gen_point) { param_gen_y = builder_->createUnaryOp( spv::OpBitcast, type_float_, diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index a201686e0..d4890d7de 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -349,11 +349,15 @@ class SpirvShaderTranslator : public ShaderTranslator { SpirvShaderTranslator(const Features& features, bool native_2x_msaa_with_attachments, bool native_2x_msaa_no_attachments, - bool edram_fragment_shader_interlock) + bool edram_fragment_shader_interlock, + uint32_t draw_resolution_scale_x = 1, + uint32_t draw_resolution_scale_y = 1) : features_(features), native_2x_msaa_with_attachments_(native_2x_msaa_with_attachments), native_2x_msaa_no_attachments_(native_2x_msaa_no_attachments), - edram_fragment_shader_interlock_(edram_fragment_shader_interlock) {} + edram_fragment_shader_interlock_(edram_fragment_shader_interlock), + draw_resolution_scale_x_(draw_resolution_scale_x), + draw_resolution_scale_y_(draw_resolution_scale_y) {} uint64_t GetDefaultVertexShaderModification( uint32_t dynamic_addressable_register_count, @@ -711,6 +715,8 @@ class SpirvShaderTranslator : public ShaderTranslator { Features features_; bool native_2x_msaa_with_attachments_; bool native_2x_msaa_no_attachments_; + uint32_t draw_resolution_scale_x_; + uint32_t draw_resolution_scale_y_; // For safety with different drivers (even though fragment shader interlock in // SPIR-V only has one control flow requirement - that both begin and end must diff --git a/src/xenia/gpu/spirv_shader_translator_fetch.cc b/src/xenia/gpu/spirv_shader_translator_fetch.cc index 8f5a74690..009748954 100644 --- a/src/xenia/gpu/spirv_shader_translator_fetch.cc +++ b/src/xenia/gpu/spirv_shader_translator_fetch.cc @@ -1069,7 +1069,9 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction( coordinates[coordinate_component_index] = coordinates_operand; } - // TODO(Triang3l): Reverting the resolution scale. + // Resolution scale doesn't need reverting for texture weights - weights are + // calculated from fractional parts of coordinates which are + // scale-independent. if (instr.opcode == ucode::FetchOpcode::kGetTextureWeights) { // FIXME(Triang3l): Filtering modes should possibly be taken into account, diff --git a/src/xenia/gpu/spirv_shader_translator_memexport.cc b/src/xenia/gpu/spirv_shader_translator_memexport.cc index 94c0adf54..7c4ba746c 100644 --- a/src/xenia/gpu/spirv_shader_translator_memexport.cc +++ b/src/xenia/gpu/spirv_shader_translator_memexport.cc @@ -37,10 +37,70 @@ void SpirvShaderTranslator::ExportToMemory(uint8_t export_eM) { // Check if memory export is allowed in this guest shader invocation. std::optional if_memexport_allowed; - if (main_memexport_allowed_ != spv::NoResult) { - if_memexport_allowed.emplace(main_memexport_allowed_, - spv::SelectionControlDontFlattenMask, - *builder_); + spv::Id memexport_allowed = main_memexport_allowed_; + + // For pixel shaders with resolution scaling, only allow memory export from + // the center host pixel to avoid duplicate exports. + if (is_pixel_shader() && + (draw_resolution_scale_x_ > 1 || draw_resolution_scale_y_ > 1)) { + assert_true(input_fragment_coordinates_ != spv::NoResult); + + // Check if we're at the center pixel (scale/2 for both X and Y). + spv::Id is_center_pixel = builder_->makeBoolConstant(true); + + // Check X coordinate. + if (draw_resolution_scale_x_ > 1) { + id_vector_temp_.clear(); + id_vector_temp_.push_back(const_int_0_); + spv::Id pixel_x = builder_->createUnaryOp( + spv::OpConvertFToU, type_uint_, + builder_->createLoad( + builder_->createAccessChain(spv::StorageClassInput, + input_fragment_coordinates_, + id_vector_temp_), + spv::NoPrecision)); + spv::Id pixel_x_remainder = builder_->createBinOp( + spv::OpUMod, type_uint_, pixel_x, + builder_->makeUintConstant(draw_resolution_scale_x_)); + is_center_pixel = builder_->createBinOp( + spv::OpLogicalAnd, type_bool_, is_center_pixel, + builder_->createBinOp( + spv::OpIEqual, type_bool_, pixel_x_remainder, + builder_->makeUintConstant(draw_resolution_scale_x_ >> 1))); + } + + // Check Y coordinate. + if (draw_resolution_scale_y_ > 1) { + id_vector_temp_.clear(); + id_vector_temp_.push_back(builder_->makeIntConstant(1)); + spv::Id pixel_y = builder_->createUnaryOp( + spv::OpConvertFToU, type_uint_, + builder_->createLoad( + builder_->createAccessChain(spv::StorageClassInput, + input_fragment_coordinates_, + id_vector_temp_), + spv::NoPrecision)); + spv::Id pixel_y_remainder = builder_->createBinOp( + spv::OpUMod, type_uint_, pixel_y, + builder_->makeUintConstant(draw_resolution_scale_y_)); + is_center_pixel = builder_->createBinOp( + spv::OpLogicalAnd, type_bool_, is_center_pixel, + builder_->createBinOp( + spv::OpIEqual, type_bool_, pixel_y_remainder, + builder_->makeUintConstant(draw_resolution_scale_y_ >> 1))); + } + + // Combine with existing memexport_allowed condition. + memexport_allowed = + memexport_allowed != spv::NoResult + ? builder_->createBinOp(spv::OpLogicalAnd, type_bool_, + memexport_allowed, is_center_pixel) + : is_center_pixel; + } + + if (memexport_allowed != spv::NoResult) { + if_memexport_allowed.emplace( + memexport_allowed, spv::SelectionControlDontFlattenMask, *builder_); } // If the pixel was killed (but the actual killing on the SPIR-V side has not diff --git a/src/xenia/gpu/spirv_shader_translator_rb.cc b/src/xenia/gpu/spirv_shader_translator_rb.cc index e19fdd540..f1e7d5291 100644 --- a/src/xenia/gpu/spirv_shader_translator_rb.cc +++ b/src/xenia/gpu/spirv_shader_translator_rb.cc @@ -765,9 +765,10 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() { fsi_color_targets_written = builder_->createLoad(var_main_fsi_color_written_, spv::NoPrecision); fsi_const_int_1 = builder_->makeIntConstant(1); - // TODO(Triang3l): Resolution scaling. + // Apply resolution scaling to EDRAM size. fsi_const_edram_size_dwords = builder_->makeUintConstant( - xenos::kEdramTileWidthSamples * xenos::kEdramTileHeightSamples * + xenos::kEdramTileWidthSamples * draw_resolution_scale_x_ * + xenos::kEdramTileHeightSamples * draw_resolution_scale_y_ * xenos::kEdramTileCount); for (uint32_t i = 0; i < 4; ++i) { fsi_samples_covered[i] = builder_->createBinOp( @@ -1449,10 +1450,12 @@ void SpirvShaderTranslator::FSI_LoadEdramOffsets(spv::Id msaa_samples) { // Get 40 x 16 x resolution scale 32bpp half-tile or 40x16 64bpp tile index. // Working with 40x16-sample portions for 64bpp and for swapping for depth - // dividing by 40, not by 80. - // TODO(Triang3l): Resolution scaling. - uint32_t tile_width = xenos::kEdramTileWidthSamples; + // Apply resolution scaling to tile dimensions. + uint32_t tile_width = + xenos::kEdramTileWidthSamples * draw_resolution_scale_x_; spv::Id const_tile_half_width = builder_->makeUintConstant(tile_width >> 1); - uint32_t tile_height = xenos::kEdramTileHeightSamples; + uint32_t tile_height = + xenos::kEdramTileHeightSamples * draw_resolution_scale_y_; spv::Id const_tile_height = builder_->makeUintConstant(tile_height); spv::Id tile_half_index[2], tile_half_sample_coordinates[2]; for (uint32_t i = 0; i < 2; ++i) { @@ -1565,8 +1568,9 @@ spv::Id SpirvShaderTranslator::FSI_AddSampleOffset(spv::Id sample_0_address, return sample_0_address; } spv::Id sample_offset; - // TODO(Triang3l): Resolution scaling. - uint32_t tile_width = xenos::kEdramTileWidthSamples; + // Apply resolution scaling to tile width. + uint32_t tile_width = + xenos::kEdramTileWidthSamples * draw_resolution_scale_x_; if (sample_index == 1) { sample_offset = builder_->makeIntConstant(tile_width); } else { diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index 99a8932c8..35e35e030 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -62,7 +62,9 @@ bool VulkanPipelineCache::Initialize() { SpirvShaderTranslator::Features(vulkan_device), render_target_cache_.msaa_2x_attachments_supported(), render_target_cache_.msaa_2x_no_attachments_supported(), - edram_fragment_shader_interlock); + edram_fragment_shader_interlock, + render_target_cache_.draw_resolution_scale_x(), + render_target_cache_.draw_resolution_scale_y()); if (edram_fragment_shader_interlock) { std::vector depth_only_fragment_shader_code = diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc index 0bcdb464e..08aef7962 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc @@ -1280,7 +1280,6 @@ bool VulkanTextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, write_descriptor_set_dest.pTexelBufferView = nullptr; } // TODO(Triang3l): Use a single 512 MB shared memory binding if possible. - // TODO(Triang3l): Scaled resolve buffer bindings. // Aligning because if the data for a vector in a storage buffer is provided // partially, the value read may still be (0, 0, 0, 0), and small (especially // linear) textures won't be loaded correctly. @@ -1779,9 +1778,7 @@ VulkanTextureCache::VulkanTextureCache( : TextureCache(register_file, shared_memory, draw_resolution_scale_x, draw_resolution_scale_y), command_processor_(command_processor), - guest_shader_pipeline_stages_(guest_shader_pipeline_stages) { - // TODO(Triang3l): Support draw resolution scaling. -} + guest_shader_pipeline_stages_(guest_shader_pipeline_stages) {} bool VulkanTextureCache::Initialize() { const ui::vulkan::VulkanDevice* const vulkan_device =