This commit is contained in:
Heel 2025-09-03 15:15:13 +09:00 committed by GitHub
commit dbd3addcaf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 571 additions and 57 deletions

View file

@ -1931,8 +1931,7 @@ void SpirvShaderTranslator::StartFragmentShaderBeforeMain() {
}
void SpirvShaderTranslator::StartFragmentShaderInMain() {
// TODO(Triang3l): Allow memory export with resolution scaling only for the
// center host pixel, with sample shading (for depth format conversion) only
// TODO(Triang3l): With sample shading (for depth format conversion) only
// for the bottom-right sample (unlike in Direct3D, the sample mask input
// doesn't include covered samples of the primitive that correspond to other
// invocations, so use the sample that's the most friendly to the half-pixel
@ -2088,7 +2087,6 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() {
// see the actual hardware instructions in both OpBitwiseXor and OpFNegate
// cases.
spv::Id const_sign_bit = builder_->makeUintConstant(UINT32_C(1) << 31);
// TODO(Triang3l): Resolution scale inversion.
// X - pixel X .0 in the magnitude, is back-facing in the sign bit.
assert_true(input_fragment_coordinates_ != spv::NoResult);
id_vector_temp_.clear();
@ -2102,6 +2100,12 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() {
input_fragment_coordinates_,
id_vector_temp_),
spv::NoPrecision)));
// Apply resolution scale inversion after truncating.
if (draw_resolution_scale_x_ > 1) {
param_gen_x = builder_->createBinOp(
spv::OpFMul, type_float_, param_gen_x,
builder_->makeFloatConstant(1.0f / float(draw_resolution_scale_x_)));
}
if (!modification.pixel.param_gen_point) {
assert_true(input_front_facing_ != spv::NoResult);
param_gen_x = builder_->createTriOp(
@ -2137,6 +2141,12 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() {
input_fragment_coordinates_,
id_vector_temp_),
spv::NoPrecision)));
// Apply resolution scale inversion after truncating.
if (draw_resolution_scale_y_ > 1) {
param_gen_y = builder_->createBinOp(
spv::OpFMul, type_float_, param_gen_y,
builder_->makeFloatConstant(1.0f / float(draw_resolution_scale_y_)));
}
if (modification.pixel.param_gen_point) {
param_gen_y = builder_->createUnaryOp(
spv::OpBitcast, type_float_,

View file

@ -349,11 +349,15 @@ class SpirvShaderTranslator : public ShaderTranslator {
SpirvShaderTranslator(const Features& features,
bool native_2x_msaa_with_attachments,
bool native_2x_msaa_no_attachments,
bool edram_fragment_shader_interlock)
bool edram_fragment_shader_interlock,
uint32_t draw_resolution_scale_x = 1,
uint32_t draw_resolution_scale_y = 1)
: features_(features),
native_2x_msaa_with_attachments_(native_2x_msaa_with_attachments),
native_2x_msaa_no_attachments_(native_2x_msaa_no_attachments),
edram_fragment_shader_interlock_(edram_fragment_shader_interlock) {}
edram_fragment_shader_interlock_(edram_fragment_shader_interlock),
draw_resolution_scale_x_(draw_resolution_scale_x),
draw_resolution_scale_y_(draw_resolution_scale_y) {}
uint64_t GetDefaultVertexShaderModification(
uint32_t dynamic_addressable_register_count,
@ -711,6 +715,8 @@ class SpirvShaderTranslator : public ShaderTranslator {
Features features_;
bool native_2x_msaa_with_attachments_;
bool native_2x_msaa_no_attachments_;
uint32_t draw_resolution_scale_x_;
uint32_t draw_resolution_scale_y_;
// For safety with different drivers (even though fragment shader interlock in
// SPIR-V only has one control flow requirement - that both begin and end must

View file

@ -1069,7 +1069,9 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
coordinates[coordinate_component_index] = coordinates_operand;
}
// TODO(Triang3l): Reverting the resolution scale.
// Resolution scale doesn't need reverting for texture weights - weights are
// calculated from fractional parts of coordinates which are
// scale-independent.
if (instr.opcode == ucode::FetchOpcode::kGetTextureWeights) {
// FIXME(Triang3l): Filtering modes should possibly be taken into account,

View file

@ -37,10 +37,70 @@ void SpirvShaderTranslator::ExportToMemory(uint8_t export_eM) {
// Check if memory export is allowed in this guest shader invocation.
std::optional<SpirvBuilder::IfBuilder> if_memexport_allowed;
if (main_memexport_allowed_ != spv::NoResult) {
if_memexport_allowed.emplace(main_memexport_allowed_,
spv::SelectionControlDontFlattenMask,
*builder_);
spv::Id memexport_allowed = main_memexport_allowed_;
// For pixel shaders with resolution scaling, only allow memory export from
// the center host pixel to avoid duplicate exports.
if (is_pixel_shader() &&
(draw_resolution_scale_x_ > 1 || draw_resolution_scale_y_ > 1)) {
assert_true(input_fragment_coordinates_ != spv::NoResult);
// Check if we're at the center pixel (scale/2 for both X and Y).
spv::Id is_center_pixel = builder_->makeBoolConstant(true);
// Check X coordinate.
if (draw_resolution_scale_x_ > 1) {
id_vector_temp_.clear();
id_vector_temp_.push_back(const_int_0_);
spv::Id pixel_x = builder_->createUnaryOp(
spv::OpConvertFToU, type_uint_,
builder_->createLoad(
builder_->createAccessChain(spv::StorageClassInput,
input_fragment_coordinates_,
id_vector_temp_),
spv::NoPrecision));
spv::Id pixel_x_remainder = builder_->createBinOp(
spv::OpUMod, type_uint_, pixel_x,
builder_->makeUintConstant(draw_resolution_scale_x_));
is_center_pixel = builder_->createBinOp(
spv::OpLogicalAnd, type_bool_, is_center_pixel,
builder_->createBinOp(
spv::OpIEqual, type_bool_, pixel_x_remainder,
builder_->makeUintConstant(draw_resolution_scale_x_ >> 1)));
}
// Check Y coordinate.
if (draw_resolution_scale_y_ > 1) {
id_vector_temp_.clear();
id_vector_temp_.push_back(builder_->makeIntConstant(1));
spv::Id pixel_y = builder_->createUnaryOp(
spv::OpConvertFToU, type_uint_,
builder_->createLoad(
builder_->createAccessChain(spv::StorageClassInput,
input_fragment_coordinates_,
id_vector_temp_),
spv::NoPrecision));
spv::Id pixel_y_remainder = builder_->createBinOp(
spv::OpUMod, type_uint_, pixel_y,
builder_->makeUintConstant(draw_resolution_scale_y_));
is_center_pixel = builder_->createBinOp(
spv::OpLogicalAnd, type_bool_, is_center_pixel,
builder_->createBinOp(
spv::OpIEqual, type_bool_, pixel_y_remainder,
builder_->makeUintConstant(draw_resolution_scale_y_ >> 1)));
}
// Combine with existing memexport_allowed condition.
memexport_allowed =
memexport_allowed != spv::NoResult
? builder_->createBinOp(spv::OpLogicalAnd, type_bool_,
memexport_allowed, is_center_pixel)
: is_center_pixel;
}
if (memexport_allowed != spv::NoResult) {
if_memexport_allowed.emplace(
memexport_allowed, spv::SelectionControlDontFlattenMask, *builder_);
}
// If the pixel was killed (but the actual killing on the SPIR-V side has not

View file

@ -765,9 +765,10 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() {
fsi_color_targets_written =
builder_->createLoad(var_main_fsi_color_written_, spv::NoPrecision);
fsi_const_int_1 = builder_->makeIntConstant(1);
// TODO(Triang3l): Resolution scaling.
// Apply resolution scaling to EDRAM size.
fsi_const_edram_size_dwords = builder_->makeUintConstant(
xenos::kEdramTileWidthSamples * xenos::kEdramTileHeightSamples *
xenos::kEdramTileWidthSamples * draw_resolution_scale_x_ *
xenos::kEdramTileHeightSamples * draw_resolution_scale_y_ *
xenos::kEdramTileCount);
for (uint32_t i = 0; i < 4; ++i) {
fsi_samples_covered[i] = builder_->createBinOp(
@ -1449,10 +1450,12 @@ void SpirvShaderTranslator::FSI_LoadEdramOffsets(spv::Id msaa_samples) {
// Get 40 x 16 x resolution scale 32bpp half-tile or 40x16 64bpp tile index.
// Working with 40x16-sample portions for 64bpp and for swapping for depth -
// dividing by 40, not by 80.
// TODO(Triang3l): Resolution scaling.
uint32_t tile_width = xenos::kEdramTileWidthSamples;
// Apply resolution scaling to tile dimensions.
uint32_t tile_width =
xenos::kEdramTileWidthSamples * draw_resolution_scale_x_;
spv::Id const_tile_half_width = builder_->makeUintConstant(tile_width >> 1);
uint32_t tile_height = xenos::kEdramTileHeightSamples;
uint32_t tile_height =
xenos::kEdramTileHeightSamples * draw_resolution_scale_y_;
spv::Id const_tile_height = builder_->makeUintConstant(tile_height);
spv::Id tile_half_index[2], tile_half_sample_coordinates[2];
for (uint32_t i = 0; i < 2; ++i) {
@ -1565,8 +1568,9 @@ spv::Id SpirvShaderTranslator::FSI_AddSampleOffset(spv::Id sample_0_address,
return sample_0_address;
}
spv::Id sample_offset;
// TODO(Triang3l): Resolution scaling.
uint32_t tile_width = xenos::kEdramTileWidthSamples;
// Apply resolution scaling to tile width.
uint32_t tile_width =
xenos::kEdramTileWidthSamples * draw_resolution_scale_x_;
if (sample_index == 1) {
sample_offset = builder_->makeIntConstant(tile_width);
} else {

View file

@ -183,10 +183,15 @@ TextureCache::~TextureCache() {
bool TextureCache::GetConfigDrawResolutionScale(uint32_t& x_out,
uint32_t& y_out) {
uint32_t config_x =
uint32_t(std::max(INT32_C(1), cvars::draw_resolution_scale_x));
uint32_t config_y =
uint32_t(std::max(INT32_C(1), cvars::draw_resolution_scale_y));
// Clamp to valid range [1, max] to ensure safe conversion to uint32_t
int32_t config_x_signed =
std::clamp(cvars::draw_resolution_scale_x, INT32_C(1),
static_cast<int32_t>(kMaxDrawResolutionScaleAlongAxis));
int32_t config_y_signed =
std::clamp(cvars::draw_resolution_scale_y, INT32_C(1),
static_cast<int32_t>(kMaxDrawResolutionScaleAlongAxis));
uint32_t config_x = static_cast<uint32_t>(config_x_signed);
uint32_t config_y = static_cast<uint32_t>(config_y_signed);
uint32_t clamped_x = std::min(kMaxDrawResolutionScaleAlongAxis, config_x);
uint32_t clamped_y = std::min(kMaxDrawResolutionScaleAlongAxis, config_y);
x_out = clamped_x;

View file

@ -273,10 +273,13 @@ bool VulkanCommandProcessor::SetupContext() {
<< shared_memory_binding_count_log2;
// Requires the transient descriptor set layouts.
// TODO(Triang3l): Get the actual draw resolution scale when the texture cache
// supports resolution scaling.
// Get draw resolution scale using the same method as D3D12
uint32_t draw_resolution_scale_x, draw_resolution_scale_y;
TextureCache::GetConfigDrawResolutionScale(draw_resolution_scale_x,
draw_resolution_scale_y);
render_target_cache_ = std::make_unique<VulkanRenderTargetCache>(
*register_file_, *memory_, trace_writer_, 1, 1, *this);
*register_file_, *memory_, trace_writer_, draw_resolution_scale_x,
draw_resolution_scale_y, *this);
if (!render_target_cache_->Initialize(shared_memory_binding_count)) {
XELOGE("Failed to initialize the render target cache");
return false;
@ -339,10 +342,10 @@ bool VulkanCommandProcessor::SetupContext() {
}
// Requires the transient descriptor set layouts.
// TODO(Triang3l): Actual draw resolution scale.
texture_cache_ =
VulkanTextureCache::Create(*register_file_, *shared_memory_, 1, 1, *this,
guest_shader_pipeline_stages_);
// Use the same draw resolution scale as render target cache
texture_cache_ = VulkanTextureCache::Create(
*register_file_, *shared_memory_, draw_resolution_scale_x,
draw_resolution_scale_y, *this, guest_shader_pipeline_stages_);
if (!texture_cache_) {
XELOGE("Failed to initialize the texture cache");
return false;
@ -2442,15 +2445,19 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
// life. Or even disregard the viewport bounds range in the fragment shader
// interlocks case completely - apply the viewport and the scissor offset
// directly to pixel address and to things like ps_param_gen.
uint32_t draw_resolution_scale_x = texture_cache_->draw_resolution_scale_x();
uint32_t draw_resolution_scale_y = texture_cache_->draw_resolution_scale_y();
draw_util::GetHostViewportInfo(
regs, 1, 1, false, device_properties.maxViewportDimensions[0],
regs, draw_resolution_scale_x, draw_resolution_scale_y, false,
device_properties.maxViewportDimensions[0],
device_properties.maxViewportDimensions[1], true,
normalized_depth_control, false, host_render_targets_used,
pixel_shader && pixel_shader->writes_depth(), viewport_info);
// Update dynamic graphics pipeline state.
UpdateDynamicState(viewport_info, primitive_polygonal,
normalized_depth_control);
normalized_depth_control, draw_resolution_scale_x,
draw_resolution_scale_y);
auto vgt_draw_initiator = regs.Get<reg::VGT_DRAW_INITIATOR>();
@ -3243,7 +3250,8 @@ void VulkanCommandProcessor::DestroyScratchBuffer() {
void VulkanCommandProcessor::UpdateDynamicState(
const draw_util::ViewportInfo& viewport_info, bool primitive_polygonal,
reg::RB_DEPTHCONTROL normalized_depth_control) {
reg::RB_DEPTHCONTROL normalized_depth_control,
uint32_t draw_resolution_scale_x, uint32_t draw_resolution_scale_y) {
#if XE_GPU_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // XE_GPU_FINE_GRAINED_DRAW_SCOPES
@ -3279,6 +3287,11 @@ void VulkanCommandProcessor::UpdateDynamicState(
// Scissor.
draw_util::Scissor scissor;
draw_util::GetScissor(regs, scissor);
// Scale the scissor to match the render target resolution scale
scissor.offset[0] *= draw_resolution_scale_x;
scissor.offset[1] *= draw_resolution_scale_y;
scissor.extent[0] *= draw_resolution_scale_x;
scissor.extent[1] *= draw_resolution_scale_y;
VkRect2D scissor_rect;
scissor_rect.offset.x = int32_t(scissor.offset[0]);
scissor_rect.offset.y = int32_t(scissor.offset[1]);

View file

@ -426,7 +426,9 @@ class VulkanCommandProcessor : public CommandProcessor {
void UpdateDynamicState(const draw_util::ViewportInfo& viewport_info,
bool primitive_polygonal,
reg::RB_DEPTHCONTROL normalized_depth_control);
reg::RB_DEPTHCONTROL normalized_depth_control,
uint32_t draw_resolution_scale_x,
uint32_t draw_resolution_scale_y);
void UpdateSystemConstantValues(
bool primitive_polygonal,
const PrimitiveProcessor::ProcessingResult& primitive_processing_result,

View file

@ -62,7 +62,9 @@ bool VulkanPipelineCache::Initialize() {
SpirvShaderTranslator::Features(vulkan_device),
render_target_cache_.msaa_2x_attachments_supported(),
render_target_cache_.msaa_2x_no_attachments_supported(),
edram_fragment_shader_interlock);
edram_fragment_shader_interlock,
render_target_cache_.draw_resolution_scale_x(),
render_target_cache_.draw_resolution_scale_y());
if (edram_fragment_shader_interlock) {
std::vector<uint8_t> depth_only_fragment_shader_code =

View file

@ -1067,6 +1067,13 @@ bool VulkanRenderTargetCache::Resolve(const Memory& memory,
uint32_t dump_pitch;
resolve_info.GetCopyEdramTileSpan(dump_base, dump_row_length_used,
dump_rows, dump_pitch);
// Scale tile parameters for resolution scaling to match resolve shader
// expectations
if (IsDrawResolutionScaled()) {
dump_row_length_used *= draw_resolution_scale_x();
dump_rows *= draw_resolution_scale_y();
dump_pitch *= draw_resolution_scale_x();
}
DumpRenderTargets(dump_base, dump_row_length_used, dump_rows, dump_pitch);
}
@ -1101,15 +1108,95 @@ bool VulkanRenderTargetCache::Resolve(const Memory& memory,
kStorageBufferCompute);
if (descriptor_set_dest != VK_NULL_HANDLE) {
// Write the destination descriptor.
// TODO(Triang3l): Scaled resolve buffer binding.
VkDescriptorBufferInfo write_descriptor_set_dest_buffer_info;
write_descriptor_set_dest_buffer_info.buffer = shared_memory.buffer();
bool scaled_buffer_ready = false;
if (draw_resolution_scaled) {
// For scaled resolve, ensure the scaled buffer exists and bind to
// it
uint32_t dest_address = resolve_info.copy_dest_base;
uint32_t dest_length = resolve_info.copy_dest_extent_start -
resolve_info.copy_dest_base +
resolve_info.copy_dest_extent_length;
// Ensure scaled resolve memory is committed
scaled_buffer_ready = true;
if (!texture_cache.EnsureScaledResolveMemoryCommittedPublic(
dest_address, dest_length)) {
XELOGE(
"Failed to commit scaled resolve memory for resolve dest at "
"0x{:08X}",
dest_address);
scaled_buffer_ready = false;
}
// Make the range current to get the buffer
if (scaled_buffer_ready &&
!texture_cache.MakeScaledResolveRangeCurrent(dest_address,
dest_length)) {
XELOGE(
"Failed to make scaled resolve range current for resolve "
"dest at 0x{:08X}",
dest_address);
scaled_buffer_ready = false;
}
// Get the current scaled buffer
VkBuffer scaled_buffer = VK_NULL_HANDLE;
if (scaled_buffer_ready) {
scaled_buffer = texture_cache.GetCurrentScaledResolveBuffer();
if (scaled_buffer == VK_NULL_HANDLE) {
XELOGE(
"No current scaled resolve buffer for resolve dest at "
"0x{:08X}",
dest_address);
scaled_buffer_ready = false;
}
}
if (scaled_buffer_ready) {
// Calculate offset within the scaled buffer
uint32_t draw_resolution_scale_area =
draw_resolution_scale_x() * draw_resolution_scale_y();
uint64_t scaled_offset =
uint64_t(dest_address) * draw_resolution_scale_area;
// Get the buffer's base offset to calculate relative offset
uint64_t buffer_relative_offset = 0;
size_t buffer_index =
texture_cache.GetScaledResolveCurrentBufferIndex();
auto* buffer_info =
texture_cache.GetScaledResolveBufferInfo(buffer_index);
if (buffer_info) {
buffer_relative_offset =
scaled_offset - buffer_info->range_start_scaled;
}
write_descriptor_set_dest_buffer_info.buffer = scaled_buffer;
write_descriptor_set_dest_buffer_info.offset =
buffer_relative_offset;
write_descriptor_set_dest_buffer_info.range =
dest_length * draw_resolution_scale_area;
}
}
if (!scaled_buffer_ready) {
// Regular unscaled resolve - write to shared memory
if (draw_resolution_scaled) {
XELOGW(
"Falling back to unscaled resolve at 0x{:08X} - scaled "
"buffer not available",
resolve_info.copy_dest_base);
}
write_descriptor_set_dest_buffer_info.buffer =
shared_memory.buffer();
write_descriptor_set_dest_buffer_info.offset =
resolve_info.copy_dest_base;
write_descriptor_set_dest_buffer_info.range =
resolve_info.copy_dest_extent_start -
resolve_info.copy_dest_base +
resolve_info.copy_dest_extent_length;
}
VkWriteDescriptorSet write_descriptor_set_dest;
write_descriptor_set_dest.sType =
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
@ -1128,11 +1215,37 @@ bool VulkanRenderTargetCache::Resolve(const Memory& memory,
nullptr);
// Submit the resolve.
// TODO(Triang3l): Transition the scaled resolve buffer.
if (!scaled_buffer_ready) {
// Regular unscaled - transition shared memory for write
shared_memory.Use(VulkanSharedMemory::Usage::kComputeWrite,
std::pair<uint32_t, uint32_t>(
resolve_info.copy_dest_extent_start,
resolve_info.copy_dest_extent_length));
} else {
// Scaled - add barrier for the scaled resolve buffer
// The buffer transitions from compute shader read (texture loading)
// to compute shader write
VkBuffer scaled_buffer =
texture_cache.GetCurrentScaledResolveBuffer();
if (scaled_buffer != VK_NULL_HANDLE) {
VkBufferMemoryBarrier buffer_barrier = {};
buffer_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
// More specific: previous compute shader reads to compute shader
// write
buffer_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
buffer_barrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
buffer_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
buffer_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
buffer_barrier.buffer = scaled_buffer;
buffer_barrier.offset = 0;
buffer_barrier.size = VK_WHOLE_SIZE;
command_buffer.CmdVkPipelineBarrier(
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, // From compute shader
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, // To compute shader
0, 0, nullptr, 1, &buffer_barrier, 0, nullptr);
}
}
UseEdramBuffer(EdramBufferUsage::kComputeRead);
command_processor_.BindExternalComputePipeline(
resolve_copy_pipelines_[size_t(copy_shader)]);
@ -1163,6 +1276,28 @@ bool VulkanRenderTargetCache::Resolve(const Memory& memory,
command_buffer.CmdVkDispatch(copy_group_count_x, copy_group_count_y,
1);
// Add barrier after writing to scaled resolve buffer
if (scaled_buffer_ready) {
VkBuffer scaled_buffer =
texture_cache.GetCurrentScaledResolveBuffer();
if (scaled_buffer != VK_NULL_HANDLE) {
VkBufferMemoryBarrier buffer_barrier = {};
buffer_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
buffer_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
buffer_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
buffer_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
buffer_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
buffer_barrier.buffer = scaled_buffer;
buffer_barrier.offset = 0;
buffer_barrier.size = VK_WHOLE_SIZE;
command_buffer.CmdVkPipelineBarrier(
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 1,
&buffer_barrier, 0, nullptr);
}
}
// Invalidate textures and mark the range as scaled if needed.
texture_cache.MarkRangeAsResolved(
resolve_info.copy_dest_extent_start,

View file

@ -477,6 +477,16 @@ VulkanTextureCache::~VulkanTextureCache() {
// textures before destroying VMA.
DestroyAllTextures(true);
// Clean up scaled resolve buffers before destroying VMA
// The command processor should ensure all GPU operations are complete
// before the texture cache is destroyed
for (ScaledResolveBuffer& buffer : scaled_resolve_buffers_) {
if (buffer.buffer != VK_NULL_HANDLE) {
vmaDestroyBuffer(vma_allocator_, buffer.buffer, buffer.allocation);
}
}
scaled_resolve_buffers_.clear();
if (vma_allocator_ != VK_NULL_HANDLE) {
vmaDestroyAllocator(vma_allocator_);
}
@ -896,6 +906,7 @@ VkImageView VulkanTextureCache::RequestSwapTexture(
return VK_NULL_HANDLE;
}
if (!LoadTextureData(*texture)) {
XELOGE("Failed to load texture data for swap texture");
return VK_NULL_HANDLE;
}
texture->MarkAsUsed();
@ -925,6 +936,13 @@ VkImageView VulkanTextureCache::RequestSwapTexture(
return texture_view;
}
bool VulkanTextureCache::IsScaledResolveSupportedForFormat(
TextureKey key) const {
// Check if the format has a valid host format pair, meaning we can handle it
const HostFormatPair& host_format_pair = GetHostFormatPair(key);
return host_format_pair.format_unsigned.format != VK_FORMAT_UNDEFINED;
}
bool VulkanTextureCache::IsSignedVersionSeparateForFormat(
TextureKey key) const {
const HostFormatPair& host_format_pair = GetHostFormatPair(key);
@ -1262,7 +1280,6 @@ bool VulkanTextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture,
write_descriptor_set_dest.pTexelBufferView = nullptr;
}
// TODO(Triang3l): Use a single 512 MB shared memory binding if possible.
// TODO(Triang3l): Scaled resolve buffer bindings.
// Aligning because if the data for a vector in a storage buffer is provided
// partially, the value read may still be (0, 0, 0, 0), and small (especially
// linear) textures won't be loaded correctly.
@ -1280,12 +1297,69 @@ bool VulkanTextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture,
if (!descriptor_set_source_base) {
return false;
}
if (texture_key.scaled_resolve) {
// For scaled textures, read from scaled resolve buffers
uint32_t guest_address = texture_key.base_page << 12;
uint32_t guest_size = vulkan_texture.GetGuestBaseSize();
// Ensure the scaled buffer exists
if (EnsureScaledResolveMemoryCommitted(guest_address, guest_size)) {
// Make the range current
if (MakeScaledResolveRangeCurrent(guest_address, guest_size)) {
VkBuffer scaled_buffer = GetCurrentScaledResolveBuffer();
if (scaled_buffer != VK_NULL_HANDLE) {
// Calculate offset within the scaled buffer
uint32_t draw_resolution_scale_area =
draw_resolution_scale_x() * draw_resolution_scale_y();
uint64_t scaled_offset =
uint64_t(guest_address) * draw_resolution_scale_area;
uint64_t buffer_relative_offset = 0;
if (scaled_resolve_current_buffer_index_ <
scaled_resolve_buffers_.size()) {
const ScaledResolveBuffer& current_buffer =
scaled_resolve_buffers_[scaled_resolve_current_buffer_index_];
buffer_relative_offset =
scaled_offset - current_buffer.range_start_scaled;
}
write_descriptor_set_source_base_buffer_info.buffer = scaled_buffer;
write_descriptor_set_source_base_buffer_info.offset =
buffer_relative_offset;
write_descriptor_set_source_base_buffer_info.range =
xe::align(guest_size * draw_resolution_scale_area,
source_length_alignment);
} else {
XELOGE(
"Scaled resolve texture load: Failed to get current scaled "
"buffer for texture at 0x{:08X}",
guest_address);
return false;
}
} else {
XELOGE(
"Scaled resolve texture load: Failed to make range current for "
"texture at 0x{:08X}",
guest_address);
return false;
}
} else {
XELOGE(
"Scaled resolve texture load: Failed to ensure scaled memory for "
"texture at 0x{:08X}",
guest_address);
return false;
}
} else {
// Regular unscaled texture - use shared memory
write_descriptor_set_source_base_buffer_info.buffer =
vulkan_shared_memory.buffer();
write_descriptor_set_source_base_buffer_info.offset = texture_key.base_page
<< 12;
write_descriptor_set_source_base_buffer_info.offset =
texture_key.base_page << 12;
write_descriptor_set_source_base_buffer_info.range =
xe::align(vulkan_texture.GetGuestBaseSize(), source_length_alignment);
}
VkWriteDescriptorSet& write_descriptor_set_source_base =
write_descriptor_sets[write_descriptor_set_count++];
write_descriptor_set_source_base.sType =
@ -1310,6 +1384,10 @@ bool VulkanTextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture,
if (!descriptor_set_source_mips) {
return false;
}
// TODO: Implement scaled mips support similar to D3D12.
// Currently mips are always loaded from unscaled shared memory even when
// the base texture is scaled. D3D12 properly handles scaled mips in
// D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl.
write_descriptor_set_source_mips_buffer_info.buffer =
vulkan_shared_memory.buffer();
write_descriptor_set_source_mips_buffer_info.offset = texture_key.mip_page
@ -1700,10 +1778,7 @@ VulkanTextureCache::VulkanTextureCache(
: TextureCache(register_file, shared_memory, draw_resolution_scale_x,
draw_resolution_scale_y),
command_processor_(command_processor),
guest_shader_pipeline_stages_(guest_shader_pipeline_stages) {
// TODO(Triang3l): Support draw resolution scaling.
assert_true(draw_resolution_scale_x == 1 && draw_resolution_scale_y == 1);
}
guest_shader_pipeline_stages_(guest_shader_pipeline_stages) {}
bool VulkanTextureCache::Initialize() {
const ui::vulkan::VulkanDevice* const vulkan_device =
@ -2656,6 +2731,160 @@ xenos::ClampMode VulkanTextureCache::NormalizeClampMode(
return clamp_mode;
}
bool VulkanTextureCache::EnsureScaledResolveMemoryCommitted(
uint32_t start_unscaled, uint32_t length_unscaled,
uint32_t length_scaled_alignment_log2) {
if (!IsDrawResolutionScaled()) {
return true;
}
if (length_unscaled == 0) {
return true;
}
if (start_unscaled > SharedMemory::kBufferSize ||
(SharedMemory::kBufferSize - start_unscaled) < length_unscaled) {
return false;
}
uint32_t draw_resolution_scale_area =
draw_resolution_scale_x() * draw_resolution_scale_y();
uint64_t start_scaled = uint64_t(start_unscaled) * draw_resolution_scale_area;
uint64_t length_scaled_alignment_bits =
(UINT64_C(1) << length_scaled_alignment_log2) - 1;
uint64_t length_scaled =
(uint64_t(length_unscaled) * draw_resolution_scale_area +
length_scaled_alignment_bits) &
~length_scaled_alignment_bits;
// Check if any existing buffer covers this range
bool range_covered = false;
for (const ScaledResolveBuffer& buffer : scaled_resolve_buffers_) {
if (buffer.range_start_scaled <= start_scaled &&
(buffer.range_start_scaled + buffer.range_length_scaled) >=
(start_scaled + length_scaled)) {
// This buffer covers the requested range
scaled_resolve_current_range_start_scaled_ = buffer.range_start_scaled;
scaled_resolve_current_range_length_scaled_ = buffer.range_length_scaled;
range_covered = true;
break;
}
}
if (!range_covered) {
// Need to create a new buffer or extend an existing one
// For simplicity and to avoid fragmentation, we'll use a fixed-size buffer
// approach similar to D3D12 (but smaller - 256MB chunks instead of 2GB)
constexpr uint64_t kBufferSize = 256 * 1024 * 1024; // 256MB per buffer
// Round up the range to cover complete buffer chunks
uint64_t buffer_start = (start_scaled / kBufferSize) * kBufferSize;
uint64_t buffer_end =
((start_scaled + length_scaled + kBufferSize - 1) / kBufferSize) *
kBufferSize;
uint64_t buffer_size = buffer_end - buffer_start;
// Check again if this expanded range is covered
bool expanded_range_covered = false;
for (const ScaledResolveBuffer& buffer : scaled_resolve_buffers_) {
if (buffer.range_start_scaled <= buffer_start &&
(buffer.range_start_scaled + buffer.range_length_scaled) >=
buffer_end) {
scaled_resolve_current_range_start_scaled_ = buffer.range_start_scaled;
scaled_resolve_current_range_length_scaled_ =
buffer.range_length_scaled;
expanded_range_covered = true;
break;
}
}
if (!expanded_range_covered) {
// Limit the number of buffers to prevent unbounded growth
constexpr size_t kMaxBuffers = 32; // Maximum 8GB total (32 * 256MB)
if (scaled_resolve_buffers_.size() >= kMaxBuffers) {
// Reuse the least recently used buffer
// For now, just reuse the first buffer (simple LRU would be better)
ScaledResolveBuffer& reused_buffer = scaled_resolve_buffers_[0];
reused_buffer.range_start_scaled = buffer_start;
reused_buffer.range_length_scaled = buffer_size;
scaled_resolve_current_range_start_scaled_ = buffer_start;
scaled_resolve_current_range_length_scaled_ = buffer_size;
} else {
ScaledResolveBuffer new_buffer;
new_buffer.size = buffer_size;
VkBufferCreateInfo buffer_create_info = {};
buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
buffer_create_info.size = new_buffer.size;
buffer_create_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
VmaAllocationCreateInfo allocation_create_info = {};
allocation_create_info.usage = VMA_MEMORY_USAGE_GPU_ONLY;
VkResult result = vmaCreateBuffer(
vma_allocator_, &buffer_create_info, &allocation_create_info,
&new_buffer.buffer, &new_buffer.allocation, nullptr);
if (result != VK_SUCCESS) {
XELOGE(
"VulkanTextureCache: Failed to create scaled resolve buffer: {}",
static_cast<int>(result));
return false;
}
new_buffer.range_start_scaled = buffer_start;
new_buffer.range_length_scaled = buffer_size;
scaled_resolve_buffers_.push_back(new_buffer);
scaled_resolve_current_range_start_scaled_ = buffer_start;
scaled_resolve_current_range_length_scaled_ = buffer_size;
}
}
}
return true;
}
bool VulkanTextureCache::MakeScaledResolveRangeCurrent(
uint32_t start_unscaled, uint32_t length_unscaled,
uint32_t length_scaled_alignment_log2) {
if (!IsDrawResolutionScaled()) {
return false;
}
// First ensure the memory is committed (creates buffers if needed)
if (!EnsureScaledResolveMemoryCommitted(start_unscaled, length_unscaled,
length_scaled_alignment_log2)) {
return false;
}
uint32_t draw_resolution_scale_area =
draw_resolution_scale_x() * draw_resolution_scale_y();
uint64_t start_scaled = uint64_t(start_unscaled) * draw_resolution_scale_area;
// Find which buffer contains this range
for (size_t i = 0; i < scaled_resolve_buffers_.size(); ++i) {
const ScaledResolveBuffer& buffer = scaled_resolve_buffers_[i];
if (start_scaled >= buffer.range_start_scaled &&
start_scaled <
(buffer.range_start_scaled + buffer.range_length_scaled)) {
scaled_resolve_current_buffer_index_ = i;
return true;
}
}
return false;
}
VkBuffer VulkanTextureCache::GetCurrentScaledResolveBuffer() const {
if (scaled_resolve_current_buffer_index_ >= scaled_resolve_buffers_.size()) {
return VK_NULL_HANDLE;
}
return scaled_resolve_buffers_[scaled_resolve_current_buffer_index_].buffer;
}
} // namespace vulkan
} // namespace gpu
} // namespace xe

View file

@ -121,7 +121,42 @@ class VulkanTextureCache final : public TextureCache {
uint32_t& height_scaled_out,
xenos::TextureFormat& format_out);
// Scaled resolve buffer management (for use by VulkanRenderTargetCache)
struct ScaledResolveBuffer {
VkBuffer buffer = VK_NULL_HANDLE;
VmaAllocation allocation = VK_NULL_HANDLE;
uint64_t size = 0;
uint64_t range_start_scaled = 0;
uint64_t range_length_scaled = 0;
};
// Public scaled resolve buffer methods for use by VulkanRenderTargetCache
bool EnsureScaledResolveMemoryCommittedPublic(
uint32_t start_unscaled, uint32_t length_unscaled,
uint32_t length_scaled_alignment_log2 = 0) {
return EnsureScaledResolveMemoryCommitted(start_unscaled, length_unscaled,
length_scaled_alignment_log2);
}
bool MakeScaledResolveRangeCurrent(uint32_t start_unscaled,
uint32_t length_unscaled,
uint32_t length_scaled_alignment_log2 = 0);
VkBuffer GetCurrentScaledResolveBuffer() const;
size_t GetScaledResolveCurrentBufferIndex() const {
return scaled_resolve_current_buffer_index_;
}
const ScaledResolveBuffer* GetScaledResolveBufferInfo(size_t index) const {
if (index < scaled_resolve_buffers_.size()) {
return &scaled_resolve_buffers_[index];
}
return nullptr;
}
protected:
bool IsScaledResolveSupportedForFormat(TextureKey key) const override;
bool IsSignedVersionSeparateForFormat(TextureKey key) const override;
uint32_t GetHostFormatSwizzle(TextureKey key) const override;
@ -135,6 +170,10 @@ class VulkanTextureCache final : public TextureCache {
bool LoadTextureDataFromResidentMemoryImpl(Texture& texture, bool load_base,
bool load_mips) override;
bool EnsureScaledResolveMemoryCommitted(
uint32_t start_unscaled, uint32_t length_unscaled,
uint32_t length_scaled_alignment_log2 = 0) override;
void UpdateTextureBindingsImpl(uint32_t fetch_constant_mask) override;
private:
@ -352,6 +391,13 @@ class VulkanTextureCache final : public TextureCache {
samplers_;
std::pair<const SamplerParameters, Sampler>* sampler_used_first_ = nullptr;
std::pair<const SamplerParameters, Sampler>* sampler_used_last_ = nullptr;
// Scaled resolve buffer storage
std::vector<ScaledResolveBuffer> scaled_resolve_buffers_;
// Current scaled resolve range tracking
uint64_t scaled_resolve_current_range_start_scaled_ = 0;
uint64_t scaled_resolve_current_range_length_scaled_ = 0;
size_t scaled_resolve_current_buffer_index_ = SIZE_MAX;
};
} // namespace vulkan