mirror of
https://github.com/xenia-project/xenia.git
synced 2025-12-06 07:12:03 +01:00
Merge 7942d680df into 01ae24e46e
This commit is contained in:
commit
dbd3addcaf
|
|
@ -1931,8 +1931,7 @@ void SpirvShaderTranslator::StartFragmentShaderBeforeMain() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void SpirvShaderTranslator::StartFragmentShaderInMain() {
|
void SpirvShaderTranslator::StartFragmentShaderInMain() {
|
||||||
// TODO(Triang3l): Allow memory export with resolution scaling only for the
|
// TODO(Triang3l): With sample shading (for depth format conversion) only
|
||||||
// center host pixel, with sample shading (for depth format conversion) only
|
|
||||||
// for the bottom-right sample (unlike in Direct3D, the sample mask input
|
// for the bottom-right sample (unlike in Direct3D, the sample mask input
|
||||||
// doesn't include covered samples of the primitive that correspond to other
|
// doesn't include covered samples of the primitive that correspond to other
|
||||||
// invocations, so use the sample that's the most friendly to the half-pixel
|
// invocations, so use the sample that's the most friendly to the half-pixel
|
||||||
|
|
@ -2088,7 +2087,6 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() {
|
||||||
// see the actual hardware instructions in both OpBitwiseXor and OpFNegate
|
// see the actual hardware instructions in both OpBitwiseXor and OpFNegate
|
||||||
// cases.
|
// cases.
|
||||||
spv::Id const_sign_bit = builder_->makeUintConstant(UINT32_C(1) << 31);
|
spv::Id const_sign_bit = builder_->makeUintConstant(UINT32_C(1) << 31);
|
||||||
// TODO(Triang3l): Resolution scale inversion.
|
|
||||||
// X - pixel X .0 in the magnitude, is back-facing in the sign bit.
|
// X - pixel X .0 in the magnitude, is back-facing in the sign bit.
|
||||||
assert_true(input_fragment_coordinates_ != spv::NoResult);
|
assert_true(input_fragment_coordinates_ != spv::NoResult);
|
||||||
id_vector_temp_.clear();
|
id_vector_temp_.clear();
|
||||||
|
|
@ -2102,6 +2100,12 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() {
|
||||||
input_fragment_coordinates_,
|
input_fragment_coordinates_,
|
||||||
id_vector_temp_),
|
id_vector_temp_),
|
||||||
spv::NoPrecision)));
|
spv::NoPrecision)));
|
||||||
|
// Apply resolution scale inversion after truncating.
|
||||||
|
if (draw_resolution_scale_x_ > 1) {
|
||||||
|
param_gen_x = builder_->createBinOp(
|
||||||
|
spv::OpFMul, type_float_, param_gen_x,
|
||||||
|
builder_->makeFloatConstant(1.0f / float(draw_resolution_scale_x_)));
|
||||||
|
}
|
||||||
if (!modification.pixel.param_gen_point) {
|
if (!modification.pixel.param_gen_point) {
|
||||||
assert_true(input_front_facing_ != spv::NoResult);
|
assert_true(input_front_facing_ != spv::NoResult);
|
||||||
param_gen_x = builder_->createTriOp(
|
param_gen_x = builder_->createTriOp(
|
||||||
|
|
@ -2137,6 +2141,12 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() {
|
||||||
input_fragment_coordinates_,
|
input_fragment_coordinates_,
|
||||||
id_vector_temp_),
|
id_vector_temp_),
|
||||||
spv::NoPrecision)));
|
spv::NoPrecision)));
|
||||||
|
// Apply resolution scale inversion after truncating.
|
||||||
|
if (draw_resolution_scale_y_ > 1) {
|
||||||
|
param_gen_y = builder_->createBinOp(
|
||||||
|
spv::OpFMul, type_float_, param_gen_y,
|
||||||
|
builder_->makeFloatConstant(1.0f / float(draw_resolution_scale_y_)));
|
||||||
|
}
|
||||||
if (modification.pixel.param_gen_point) {
|
if (modification.pixel.param_gen_point) {
|
||||||
param_gen_y = builder_->createUnaryOp(
|
param_gen_y = builder_->createUnaryOp(
|
||||||
spv::OpBitcast, type_float_,
|
spv::OpBitcast, type_float_,
|
||||||
|
|
|
||||||
|
|
@ -349,11 +349,15 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
||||||
SpirvShaderTranslator(const Features& features,
|
SpirvShaderTranslator(const Features& features,
|
||||||
bool native_2x_msaa_with_attachments,
|
bool native_2x_msaa_with_attachments,
|
||||||
bool native_2x_msaa_no_attachments,
|
bool native_2x_msaa_no_attachments,
|
||||||
bool edram_fragment_shader_interlock)
|
bool edram_fragment_shader_interlock,
|
||||||
|
uint32_t draw_resolution_scale_x = 1,
|
||||||
|
uint32_t draw_resolution_scale_y = 1)
|
||||||
: features_(features),
|
: features_(features),
|
||||||
native_2x_msaa_with_attachments_(native_2x_msaa_with_attachments),
|
native_2x_msaa_with_attachments_(native_2x_msaa_with_attachments),
|
||||||
native_2x_msaa_no_attachments_(native_2x_msaa_no_attachments),
|
native_2x_msaa_no_attachments_(native_2x_msaa_no_attachments),
|
||||||
edram_fragment_shader_interlock_(edram_fragment_shader_interlock) {}
|
edram_fragment_shader_interlock_(edram_fragment_shader_interlock),
|
||||||
|
draw_resolution_scale_x_(draw_resolution_scale_x),
|
||||||
|
draw_resolution_scale_y_(draw_resolution_scale_y) {}
|
||||||
|
|
||||||
uint64_t GetDefaultVertexShaderModification(
|
uint64_t GetDefaultVertexShaderModification(
|
||||||
uint32_t dynamic_addressable_register_count,
|
uint32_t dynamic_addressable_register_count,
|
||||||
|
|
@ -711,6 +715,8 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
||||||
Features features_;
|
Features features_;
|
||||||
bool native_2x_msaa_with_attachments_;
|
bool native_2x_msaa_with_attachments_;
|
||||||
bool native_2x_msaa_no_attachments_;
|
bool native_2x_msaa_no_attachments_;
|
||||||
|
uint32_t draw_resolution_scale_x_;
|
||||||
|
uint32_t draw_resolution_scale_y_;
|
||||||
|
|
||||||
// For safety with different drivers (even though fragment shader interlock in
|
// For safety with different drivers (even though fragment shader interlock in
|
||||||
// SPIR-V only has one control flow requirement - that both begin and end must
|
// SPIR-V only has one control flow requirement - that both begin and end must
|
||||||
|
|
|
||||||
|
|
@ -1069,7 +1069,9 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
coordinates[coordinate_component_index] = coordinates_operand;
|
coordinates[coordinate_component_index] = coordinates_operand;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(Triang3l): Reverting the resolution scale.
|
// Resolution scale doesn't need reverting for texture weights - weights are
|
||||||
|
// calculated from fractional parts of coordinates which are
|
||||||
|
// scale-independent.
|
||||||
|
|
||||||
if (instr.opcode == ucode::FetchOpcode::kGetTextureWeights) {
|
if (instr.opcode == ucode::FetchOpcode::kGetTextureWeights) {
|
||||||
// FIXME(Triang3l): Filtering modes should possibly be taken into account,
|
// FIXME(Triang3l): Filtering modes should possibly be taken into account,
|
||||||
|
|
|
||||||
|
|
@ -37,10 +37,70 @@ void SpirvShaderTranslator::ExportToMemory(uint8_t export_eM) {
|
||||||
|
|
||||||
// Check if memory export is allowed in this guest shader invocation.
|
// Check if memory export is allowed in this guest shader invocation.
|
||||||
std::optional<SpirvBuilder::IfBuilder> if_memexport_allowed;
|
std::optional<SpirvBuilder::IfBuilder> if_memexport_allowed;
|
||||||
if (main_memexport_allowed_ != spv::NoResult) {
|
spv::Id memexport_allowed = main_memexport_allowed_;
|
||||||
if_memexport_allowed.emplace(main_memexport_allowed_,
|
|
||||||
spv::SelectionControlDontFlattenMask,
|
// For pixel shaders with resolution scaling, only allow memory export from
|
||||||
*builder_);
|
// the center host pixel to avoid duplicate exports.
|
||||||
|
if (is_pixel_shader() &&
|
||||||
|
(draw_resolution_scale_x_ > 1 || draw_resolution_scale_y_ > 1)) {
|
||||||
|
assert_true(input_fragment_coordinates_ != spv::NoResult);
|
||||||
|
|
||||||
|
// Check if we're at the center pixel (scale/2 for both X and Y).
|
||||||
|
spv::Id is_center_pixel = builder_->makeBoolConstant(true);
|
||||||
|
|
||||||
|
// Check X coordinate.
|
||||||
|
if (draw_resolution_scale_x_ > 1) {
|
||||||
|
id_vector_temp_.clear();
|
||||||
|
id_vector_temp_.push_back(const_int_0_);
|
||||||
|
spv::Id pixel_x = builder_->createUnaryOp(
|
||||||
|
spv::OpConvertFToU, type_uint_,
|
||||||
|
builder_->createLoad(
|
||||||
|
builder_->createAccessChain(spv::StorageClassInput,
|
||||||
|
input_fragment_coordinates_,
|
||||||
|
id_vector_temp_),
|
||||||
|
spv::NoPrecision));
|
||||||
|
spv::Id pixel_x_remainder = builder_->createBinOp(
|
||||||
|
spv::OpUMod, type_uint_, pixel_x,
|
||||||
|
builder_->makeUintConstant(draw_resolution_scale_x_));
|
||||||
|
is_center_pixel = builder_->createBinOp(
|
||||||
|
spv::OpLogicalAnd, type_bool_, is_center_pixel,
|
||||||
|
builder_->createBinOp(
|
||||||
|
spv::OpIEqual, type_bool_, pixel_x_remainder,
|
||||||
|
builder_->makeUintConstant(draw_resolution_scale_x_ >> 1)));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check Y coordinate.
|
||||||
|
if (draw_resolution_scale_y_ > 1) {
|
||||||
|
id_vector_temp_.clear();
|
||||||
|
id_vector_temp_.push_back(builder_->makeIntConstant(1));
|
||||||
|
spv::Id pixel_y = builder_->createUnaryOp(
|
||||||
|
spv::OpConvertFToU, type_uint_,
|
||||||
|
builder_->createLoad(
|
||||||
|
builder_->createAccessChain(spv::StorageClassInput,
|
||||||
|
input_fragment_coordinates_,
|
||||||
|
id_vector_temp_),
|
||||||
|
spv::NoPrecision));
|
||||||
|
spv::Id pixel_y_remainder = builder_->createBinOp(
|
||||||
|
spv::OpUMod, type_uint_, pixel_y,
|
||||||
|
builder_->makeUintConstant(draw_resolution_scale_y_));
|
||||||
|
is_center_pixel = builder_->createBinOp(
|
||||||
|
spv::OpLogicalAnd, type_bool_, is_center_pixel,
|
||||||
|
builder_->createBinOp(
|
||||||
|
spv::OpIEqual, type_bool_, pixel_y_remainder,
|
||||||
|
builder_->makeUintConstant(draw_resolution_scale_y_ >> 1)));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Combine with existing memexport_allowed condition.
|
||||||
|
memexport_allowed =
|
||||||
|
memexport_allowed != spv::NoResult
|
||||||
|
? builder_->createBinOp(spv::OpLogicalAnd, type_bool_,
|
||||||
|
memexport_allowed, is_center_pixel)
|
||||||
|
: is_center_pixel;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (memexport_allowed != spv::NoResult) {
|
||||||
|
if_memexport_allowed.emplace(
|
||||||
|
memexport_allowed, spv::SelectionControlDontFlattenMask, *builder_);
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the pixel was killed (but the actual killing on the SPIR-V side has not
|
// If the pixel was killed (but the actual killing on the SPIR-V side has not
|
||||||
|
|
|
||||||
|
|
@ -765,9 +765,10 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() {
|
||||||
fsi_color_targets_written =
|
fsi_color_targets_written =
|
||||||
builder_->createLoad(var_main_fsi_color_written_, spv::NoPrecision);
|
builder_->createLoad(var_main_fsi_color_written_, spv::NoPrecision);
|
||||||
fsi_const_int_1 = builder_->makeIntConstant(1);
|
fsi_const_int_1 = builder_->makeIntConstant(1);
|
||||||
// TODO(Triang3l): Resolution scaling.
|
// Apply resolution scaling to EDRAM size.
|
||||||
fsi_const_edram_size_dwords = builder_->makeUintConstant(
|
fsi_const_edram_size_dwords = builder_->makeUintConstant(
|
||||||
xenos::kEdramTileWidthSamples * xenos::kEdramTileHeightSamples *
|
xenos::kEdramTileWidthSamples * draw_resolution_scale_x_ *
|
||||||
|
xenos::kEdramTileHeightSamples * draw_resolution_scale_y_ *
|
||||||
xenos::kEdramTileCount);
|
xenos::kEdramTileCount);
|
||||||
for (uint32_t i = 0; i < 4; ++i) {
|
for (uint32_t i = 0; i < 4; ++i) {
|
||||||
fsi_samples_covered[i] = builder_->createBinOp(
|
fsi_samples_covered[i] = builder_->createBinOp(
|
||||||
|
|
@ -1449,10 +1450,12 @@ void SpirvShaderTranslator::FSI_LoadEdramOffsets(spv::Id msaa_samples) {
|
||||||
// Get 40 x 16 x resolution scale 32bpp half-tile or 40x16 64bpp tile index.
|
// Get 40 x 16 x resolution scale 32bpp half-tile or 40x16 64bpp tile index.
|
||||||
// Working with 40x16-sample portions for 64bpp and for swapping for depth -
|
// Working with 40x16-sample portions for 64bpp and for swapping for depth -
|
||||||
// dividing by 40, not by 80.
|
// dividing by 40, not by 80.
|
||||||
// TODO(Triang3l): Resolution scaling.
|
// Apply resolution scaling to tile dimensions.
|
||||||
uint32_t tile_width = xenos::kEdramTileWidthSamples;
|
uint32_t tile_width =
|
||||||
|
xenos::kEdramTileWidthSamples * draw_resolution_scale_x_;
|
||||||
spv::Id const_tile_half_width = builder_->makeUintConstant(tile_width >> 1);
|
spv::Id const_tile_half_width = builder_->makeUintConstant(tile_width >> 1);
|
||||||
uint32_t tile_height = xenos::kEdramTileHeightSamples;
|
uint32_t tile_height =
|
||||||
|
xenos::kEdramTileHeightSamples * draw_resolution_scale_y_;
|
||||||
spv::Id const_tile_height = builder_->makeUintConstant(tile_height);
|
spv::Id const_tile_height = builder_->makeUintConstant(tile_height);
|
||||||
spv::Id tile_half_index[2], tile_half_sample_coordinates[2];
|
spv::Id tile_half_index[2], tile_half_sample_coordinates[2];
|
||||||
for (uint32_t i = 0; i < 2; ++i) {
|
for (uint32_t i = 0; i < 2; ++i) {
|
||||||
|
|
@ -1565,8 +1568,9 @@ spv::Id SpirvShaderTranslator::FSI_AddSampleOffset(spv::Id sample_0_address,
|
||||||
return sample_0_address;
|
return sample_0_address;
|
||||||
}
|
}
|
||||||
spv::Id sample_offset;
|
spv::Id sample_offset;
|
||||||
// TODO(Triang3l): Resolution scaling.
|
// Apply resolution scaling to tile width.
|
||||||
uint32_t tile_width = xenos::kEdramTileWidthSamples;
|
uint32_t tile_width =
|
||||||
|
xenos::kEdramTileWidthSamples * draw_resolution_scale_x_;
|
||||||
if (sample_index == 1) {
|
if (sample_index == 1) {
|
||||||
sample_offset = builder_->makeIntConstant(tile_width);
|
sample_offset = builder_->makeIntConstant(tile_width);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
|
|
@ -183,10 +183,15 @@ TextureCache::~TextureCache() {
|
||||||
|
|
||||||
bool TextureCache::GetConfigDrawResolutionScale(uint32_t& x_out,
|
bool TextureCache::GetConfigDrawResolutionScale(uint32_t& x_out,
|
||||||
uint32_t& y_out) {
|
uint32_t& y_out) {
|
||||||
uint32_t config_x =
|
// Clamp to valid range [1, max] to ensure safe conversion to uint32_t
|
||||||
uint32_t(std::max(INT32_C(1), cvars::draw_resolution_scale_x));
|
int32_t config_x_signed =
|
||||||
uint32_t config_y =
|
std::clamp(cvars::draw_resolution_scale_x, INT32_C(1),
|
||||||
uint32_t(std::max(INT32_C(1), cvars::draw_resolution_scale_y));
|
static_cast<int32_t>(kMaxDrawResolutionScaleAlongAxis));
|
||||||
|
int32_t config_y_signed =
|
||||||
|
std::clamp(cvars::draw_resolution_scale_y, INT32_C(1),
|
||||||
|
static_cast<int32_t>(kMaxDrawResolutionScaleAlongAxis));
|
||||||
|
uint32_t config_x = static_cast<uint32_t>(config_x_signed);
|
||||||
|
uint32_t config_y = static_cast<uint32_t>(config_y_signed);
|
||||||
uint32_t clamped_x = std::min(kMaxDrawResolutionScaleAlongAxis, config_x);
|
uint32_t clamped_x = std::min(kMaxDrawResolutionScaleAlongAxis, config_x);
|
||||||
uint32_t clamped_y = std::min(kMaxDrawResolutionScaleAlongAxis, config_y);
|
uint32_t clamped_y = std::min(kMaxDrawResolutionScaleAlongAxis, config_y);
|
||||||
x_out = clamped_x;
|
x_out = clamped_x;
|
||||||
|
|
|
||||||
|
|
@ -273,10 +273,13 @@ bool VulkanCommandProcessor::SetupContext() {
|
||||||
<< shared_memory_binding_count_log2;
|
<< shared_memory_binding_count_log2;
|
||||||
|
|
||||||
// Requires the transient descriptor set layouts.
|
// Requires the transient descriptor set layouts.
|
||||||
// TODO(Triang3l): Get the actual draw resolution scale when the texture cache
|
// Get draw resolution scale using the same method as D3D12
|
||||||
// supports resolution scaling.
|
uint32_t draw_resolution_scale_x, draw_resolution_scale_y;
|
||||||
|
TextureCache::GetConfigDrawResolutionScale(draw_resolution_scale_x,
|
||||||
|
draw_resolution_scale_y);
|
||||||
render_target_cache_ = std::make_unique<VulkanRenderTargetCache>(
|
render_target_cache_ = std::make_unique<VulkanRenderTargetCache>(
|
||||||
*register_file_, *memory_, trace_writer_, 1, 1, *this);
|
*register_file_, *memory_, trace_writer_, draw_resolution_scale_x,
|
||||||
|
draw_resolution_scale_y, *this);
|
||||||
if (!render_target_cache_->Initialize(shared_memory_binding_count)) {
|
if (!render_target_cache_->Initialize(shared_memory_binding_count)) {
|
||||||
XELOGE("Failed to initialize the render target cache");
|
XELOGE("Failed to initialize the render target cache");
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -339,10 +342,10 @@ bool VulkanCommandProcessor::SetupContext() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Requires the transient descriptor set layouts.
|
// Requires the transient descriptor set layouts.
|
||||||
// TODO(Triang3l): Actual draw resolution scale.
|
// Use the same draw resolution scale as render target cache
|
||||||
texture_cache_ =
|
texture_cache_ = VulkanTextureCache::Create(
|
||||||
VulkanTextureCache::Create(*register_file_, *shared_memory_, 1, 1, *this,
|
*register_file_, *shared_memory_, draw_resolution_scale_x,
|
||||||
guest_shader_pipeline_stages_);
|
draw_resolution_scale_y, *this, guest_shader_pipeline_stages_);
|
||||||
if (!texture_cache_) {
|
if (!texture_cache_) {
|
||||||
XELOGE("Failed to initialize the texture cache");
|
XELOGE("Failed to initialize the texture cache");
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -2442,15 +2445,19 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
|
||||||
// life. Or even disregard the viewport bounds range in the fragment shader
|
// life. Or even disregard the viewport bounds range in the fragment shader
|
||||||
// interlocks case completely - apply the viewport and the scissor offset
|
// interlocks case completely - apply the viewport and the scissor offset
|
||||||
// directly to pixel address and to things like ps_param_gen.
|
// directly to pixel address and to things like ps_param_gen.
|
||||||
|
uint32_t draw_resolution_scale_x = texture_cache_->draw_resolution_scale_x();
|
||||||
|
uint32_t draw_resolution_scale_y = texture_cache_->draw_resolution_scale_y();
|
||||||
draw_util::GetHostViewportInfo(
|
draw_util::GetHostViewportInfo(
|
||||||
regs, 1, 1, false, device_properties.maxViewportDimensions[0],
|
regs, draw_resolution_scale_x, draw_resolution_scale_y, false,
|
||||||
|
device_properties.maxViewportDimensions[0],
|
||||||
device_properties.maxViewportDimensions[1], true,
|
device_properties.maxViewportDimensions[1], true,
|
||||||
normalized_depth_control, false, host_render_targets_used,
|
normalized_depth_control, false, host_render_targets_used,
|
||||||
pixel_shader && pixel_shader->writes_depth(), viewport_info);
|
pixel_shader && pixel_shader->writes_depth(), viewport_info);
|
||||||
|
|
||||||
// Update dynamic graphics pipeline state.
|
// Update dynamic graphics pipeline state.
|
||||||
UpdateDynamicState(viewport_info, primitive_polygonal,
|
UpdateDynamicState(viewport_info, primitive_polygonal,
|
||||||
normalized_depth_control);
|
normalized_depth_control, draw_resolution_scale_x,
|
||||||
|
draw_resolution_scale_y);
|
||||||
|
|
||||||
auto vgt_draw_initiator = regs.Get<reg::VGT_DRAW_INITIATOR>();
|
auto vgt_draw_initiator = regs.Get<reg::VGT_DRAW_INITIATOR>();
|
||||||
|
|
||||||
|
|
@ -3243,7 +3250,8 @@ void VulkanCommandProcessor::DestroyScratchBuffer() {
|
||||||
|
|
||||||
void VulkanCommandProcessor::UpdateDynamicState(
|
void VulkanCommandProcessor::UpdateDynamicState(
|
||||||
const draw_util::ViewportInfo& viewport_info, bool primitive_polygonal,
|
const draw_util::ViewportInfo& viewport_info, bool primitive_polygonal,
|
||||||
reg::RB_DEPTHCONTROL normalized_depth_control) {
|
reg::RB_DEPTHCONTROL normalized_depth_control,
|
||||||
|
uint32_t draw_resolution_scale_x, uint32_t draw_resolution_scale_y) {
|
||||||
#if XE_GPU_FINE_GRAINED_DRAW_SCOPES
|
#if XE_GPU_FINE_GRAINED_DRAW_SCOPES
|
||||||
SCOPE_profile_cpu_f("gpu");
|
SCOPE_profile_cpu_f("gpu");
|
||||||
#endif // XE_GPU_FINE_GRAINED_DRAW_SCOPES
|
#endif // XE_GPU_FINE_GRAINED_DRAW_SCOPES
|
||||||
|
|
@ -3279,6 +3287,11 @@ void VulkanCommandProcessor::UpdateDynamicState(
|
||||||
// Scissor.
|
// Scissor.
|
||||||
draw_util::Scissor scissor;
|
draw_util::Scissor scissor;
|
||||||
draw_util::GetScissor(regs, scissor);
|
draw_util::GetScissor(regs, scissor);
|
||||||
|
// Scale the scissor to match the render target resolution scale
|
||||||
|
scissor.offset[0] *= draw_resolution_scale_x;
|
||||||
|
scissor.offset[1] *= draw_resolution_scale_y;
|
||||||
|
scissor.extent[0] *= draw_resolution_scale_x;
|
||||||
|
scissor.extent[1] *= draw_resolution_scale_y;
|
||||||
VkRect2D scissor_rect;
|
VkRect2D scissor_rect;
|
||||||
scissor_rect.offset.x = int32_t(scissor.offset[0]);
|
scissor_rect.offset.x = int32_t(scissor.offset[0]);
|
||||||
scissor_rect.offset.y = int32_t(scissor.offset[1]);
|
scissor_rect.offset.y = int32_t(scissor.offset[1]);
|
||||||
|
|
|
||||||
|
|
@ -426,7 +426,9 @@ class VulkanCommandProcessor : public CommandProcessor {
|
||||||
|
|
||||||
void UpdateDynamicState(const draw_util::ViewportInfo& viewport_info,
|
void UpdateDynamicState(const draw_util::ViewportInfo& viewport_info,
|
||||||
bool primitive_polygonal,
|
bool primitive_polygonal,
|
||||||
reg::RB_DEPTHCONTROL normalized_depth_control);
|
reg::RB_DEPTHCONTROL normalized_depth_control,
|
||||||
|
uint32_t draw_resolution_scale_x,
|
||||||
|
uint32_t draw_resolution_scale_y);
|
||||||
void UpdateSystemConstantValues(
|
void UpdateSystemConstantValues(
|
||||||
bool primitive_polygonal,
|
bool primitive_polygonal,
|
||||||
const PrimitiveProcessor::ProcessingResult& primitive_processing_result,
|
const PrimitiveProcessor::ProcessingResult& primitive_processing_result,
|
||||||
|
|
|
||||||
|
|
@ -62,7 +62,9 @@ bool VulkanPipelineCache::Initialize() {
|
||||||
SpirvShaderTranslator::Features(vulkan_device),
|
SpirvShaderTranslator::Features(vulkan_device),
|
||||||
render_target_cache_.msaa_2x_attachments_supported(),
|
render_target_cache_.msaa_2x_attachments_supported(),
|
||||||
render_target_cache_.msaa_2x_no_attachments_supported(),
|
render_target_cache_.msaa_2x_no_attachments_supported(),
|
||||||
edram_fragment_shader_interlock);
|
edram_fragment_shader_interlock,
|
||||||
|
render_target_cache_.draw_resolution_scale_x(),
|
||||||
|
render_target_cache_.draw_resolution_scale_y());
|
||||||
|
|
||||||
if (edram_fragment_shader_interlock) {
|
if (edram_fragment_shader_interlock) {
|
||||||
std::vector<uint8_t> depth_only_fragment_shader_code =
|
std::vector<uint8_t> depth_only_fragment_shader_code =
|
||||||
|
|
|
||||||
|
|
@ -1067,6 +1067,13 @@ bool VulkanRenderTargetCache::Resolve(const Memory& memory,
|
||||||
uint32_t dump_pitch;
|
uint32_t dump_pitch;
|
||||||
resolve_info.GetCopyEdramTileSpan(dump_base, dump_row_length_used,
|
resolve_info.GetCopyEdramTileSpan(dump_base, dump_row_length_used,
|
||||||
dump_rows, dump_pitch);
|
dump_rows, dump_pitch);
|
||||||
|
// Scale tile parameters for resolution scaling to match resolve shader
|
||||||
|
// expectations
|
||||||
|
if (IsDrawResolutionScaled()) {
|
||||||
|
dump_row_length_used *= draw_resolution_scale_x();
|
||||||
|
dump_rows *= draw_resolution_scale_y();
|
||||||
|
dump_pitch *= draw_resolution_scale_x();
|
||||||
|
}
|
||||||
DumpRenderTargets(dump_base, dump_row_length_used, dump_rows, dump_pitch);
|
DumpRenderTargets(dump_base, dump_row_length_used, dump_rows, dump_pitch);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1101,15 +1108,95 @@ bool VulkanRenderTargetCache::Resolve(const Memory& memory,
|
||||||
kStorageBufferCompute);
|
kStorageBufferCompute);
|
||||||
if (descriptor_set_dest != VK_NULL_HANDLE) {
|
if (descriptor_set_dest != VK_NULL_HANDLE) {
|
||||||
// Write the destination descriptor.
|
// Write the destination descriptor.
|
||||||
// TODO(Triang3l): Scaled resolve buffer binding.
|
|
||||||
VkDescriptorBufferInfo write_descriptor_set_dest_buffer_info;
|
VkDescriptorBufferInfo write_descriptor_set_dest_buffer_info;
|
||||||
write_descriptor_set_dest_buffer_info.buffer = shared_memory.buffer();
|
|
||||||
|
bool scaled_buffer_ready = false;
|
||||||
|
if (draw_resolution_scaled) {
|
||||||
|
// For scaled resolve, ensure the scaled buffer exists and bind to
|
||||||
|
// it
|
||||||
|
uint32_t dest_address = resolve_info.copy_dest_base;
|
||||||
|
uint32_t dest_length = resolve_info.copy_dest_extent_start -
|
||||||
|
resolve_info.copy_dest_base +
|
||||||
|
resolve_info.copy_dest_extent_length;
|
||||||
|
|
||||||
|
// Ensure scaled resolve memory is committed
|
||||||
|
scaled_buffer_ready = true;
|
||||||
|
if (!texture_cache.EnsureScaledResolveMemoryCommittedPublic(
|
||||||
|
dest_address, dest_length)) {
|
||||||
|
XELOGE(
|
||||||
|
"Failed to commit scaled resolve memory for resolve dest at "
|
||||||
|
"0x{:08X}",
|
||||||
|
dest_address);
|
||||||
|
scaled_buffer_ready = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make the range current to get the buffer
|
||||||
|
if (scaled_buffer_ready &&
|
||||||
|
!texture_cache.MakeScaledResolveRangeCurrent(dest_address,
|
||||||
|
dest_length)) {
|
||||||
|
XELOGE(
|
||||||
|
"Failed to make scaled resolve range current for resolve "
|
||||||
|
"dest at 0x{:08X}",
|
||||||
|
dest_address);
|
||||||
|
scaled_buffer_ready = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the current scaled buffer
|
||||||
|
VkBuffer scaled_buffer = VK_NULL_HANDLE;
|
||||||
|
if (scaled_buffer_ready) {
|
||||||
|
scaled_buffer = texture_cache.GetCurrentScaledResolveBuffer();
|
||||||
|
if (scaled_buffer == VK_NULL_HANDLE) {
|
||||||
|
XELOGE(
|
||||||
|
"No current scaled resolve buffer for resolve dest at "
|
||||||
|
"0x{:08X}",
|
||||||
|
dest_address);
|
||||||
|
scaled_buffer_ready = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (scaled_buffer_ready) {
|
||||||
|
// Calculate offset within the scaled buffer
|
||||||
|
uint32_t draw_resolution_scale_area =
|
||||||
|
draw_resolution_scale_x() * draw_resolution_scale_y();
|
||||||
|
uint64_t scaled_offset =
|
||||||
|
uint64_t(dest_address) * draw_resolution_scale_area;
|
||||||
|
|
||||||
|
// Get the buffer's base offset to calculate relative offset
|
||||||
|
uint64_t buffer_relative_offset = 0;
|
||||||
|
size_t buffer_index =
|
||||||
|
texture_cache.GetScaledResolveCurrentBufferIndex();
|
||||||
|
auto* buffer_info =
|
||||||
|
texture_cache.GetScaledResolveBufferInfo(buffer_index);
|
||||||
|
if (buffer_info) {
|
||||||
|
buffer_relative_offset =
|
||||||
|
scaled_offset - buffer_info->range_start_scaled;
|
||||||
|
}
|
||||||
|
|
||||||
|
write_descriptor_set_dest_buffer_info.buffer = scaled_buffer;
|
||||||
|
write_descriptor_set_dest_buffer_info.offset =
|
||||||
|
buffer_relative_offset;
|
||||||
|
write_descriptor_set_dest_buffer_info.range =
|
||||||
|
dest_length * draw_resolution_scale_area;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!scaled_buffer_ready) {
|
||||||
|
// Regular unscaled resolve - write to shared memory
|
||||||
|
if (draw_resolution_scaled) {
|
||||||
|
XELOGW(
|
||||||
|
"Falling back to unscaled resolve at 0x{:08X} - scaled "
|
||||||
|
"buffer not available",
|
||||||
|
resolve_info.copy_dest_base);
|
||||||
|
}
|
||||||
|
write_descriptor_set_dest_buffer_info.buffer =
|
||||||
|
shared_memory.buffer();
|
||||||
write_descriptor_set_dest_buffer_info.offset =
|
write_descriptor_set_dest_buffer_info.offset =
|
||||||
resolve_info.copy_dest_base;
|
resolve_info.copy_dest_base;
|
||||||
write_descriptor_set_dest_buffer_info.range =
|
write_descriptor_set_dest_buffer_info.range =
|
||||||
resolve_info.copy_dest_extent_start -
|
resolve_info.copy_dest_extent_start -
|
||||||
resolve_info.copy_dest_base +
|
resolve_info.copy_dest_base +
|
||||||
resolve_info.copy_dest_extent_length;
|
resolve_info.copy_dest_extent_length;
|
||||||
|
}
|
||||||
VkWriteDescriptorSet write_descriptor_set_dest;
|
VkWriteDescriptorSet write_descriptor_set_dest;
|
||||||
write_descriptor_set_dest.sType =
|
write_descriptor_set_dest.sType =
|
||||||
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
||||||
|
|
@ -1128,11 +1215,37 @@ bool VulkanRenderTargetCache::Resolve(const Memory& memory,
|
||||||
nullptr);
|
nullptr);
|
||||||
|
|
||||||
// Submit the resolve.
|
// Submit the resolve.
|
||||||
// TODO(Triang3l): Transition the scaled resolve buffer.
|
if (!scaled_buffer_ready) {
|
||||||
|
// Regular unscaled - transition shared memory for write
|
||||||
shared_memory.Use(VulkanSharedMemory::Usage::kComputeWrite,
|
shared_memory.Use(VulkanSharedMemory::Usage::kComputeWrite,
|
||||||
std::pair<uint32_t, uint32_t>(
|
std::pair<uint32_t, uint32_t>(
|
||||||
resolve_info.copy_dest_extent_start,
|
resolve_info.copy_dest_extent_start,
|
||||||
resolve_info.copy_dest_extent_length));
|
resolve_info.copy_dest_extent_length));
|
||||||
|
} else {
|
||||||
|
// Scaled - add barrier for the scaled resolve buffer
|
||||||
|
// The buffer transitions from compute shader read (texture loading)
|
||||||
|
// to compute shader write
|
||||||
|
VkBuffer scaled_buffer =
|
||||||
|
texture_cache.GetCurrentScaledResolveBuffer();
|
||||||
|
if (scaled_buffer != VK_NULL_HANDLE) {
|
||||||
|
VkBufferMemoryBarrier buffer_barrier = {};
|
||||||
|
buffer_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
||||||
|
// More specific: previous compute shader reads to compute shader
|
||||||
|
// write
|
||||||
|
buffer_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||||
|
buffer_barrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||||
|
buffer_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||||
|
buffer_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||||
|
buffer_barrier.buffer = scaled_buffer;
|
||||||
|
buffer_barrier.offset = 0;
|
||||||
|
buffer_barrier.size = VK_WHOLE_SIZE;
|
||||||
|
|
||||||
|
command_buffer.CmdVkPipelineBarrier(
|
||||||
|
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, // From compute shader
|
||||||
|
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, // To compute shader
|
||||||
|
0, 0, nullptr, 1, &buffer_barrier, 0, nullptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
UseEdramBuffer(EdramBufferUsage::kComputeRead);
|
UseEdramBuffer(EdramBufferUsage::kComputeRead);
|
||||||
command_processor_.BindExternalComputePipeline(
|
command_processor_.BindExternalComputePipeline(
|
||||||
resolve_copy_pipelines_[size_t(copy_shader)]);
|
resolve_copy_pipelines_[size_t(copy_shader)]);
|
||||||
|
|
@ -1163,6 +1276,28 @@ bool VulkanRenderTargetCache::Resolve(const Memory& memory,
|
||||||
command_buffer.CmdVkDispatch(copy_group_count_x, copy_group_count_y,
|
command_buffer.CmdVkDispatch(copy_group_count_x, copy_group_count_y,
|
||||||
1);
|
1);
|
||||||
|
|
||||||
|
// Add barrier after writing to scaled resolve buffer
|
||||||
|
if (scaled_buffer_ready) {
|
||||||
|
VkBuffer scaled_buffer =
|
||||||
|
texture_cache.GetCurrentScaledResolveBuffer();
|
||||||
|
if (scaled_buffer != VK_NULL_HANDLE) {
|
||||||
|
VkBufferMemoryBarrier buffer_barrier = {};
|
||||||
|
buffer_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
||||||
|
buffer_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||||
|
buffer_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||||
|
buffer_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||||
|
buffer_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||||
|
buffer_barrier.buffer = scaled_buffer;
|
||||||
|
buffer_barrier.offset = 0;
|
||||||
|
buffer_barrier.size = VK_WHOLE_SIZE;
|
||||||
|
|
||||||
|
command_buffer.CmdVkPipelineBarrier(
|
||||||
|
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||||
|
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 1,
|
||||||
|
&buffer_barrier, 0, nullptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Invalidate textures and mark the range as scaled if needed.
|
// Invalidate textures and mark the range as scaled if needed.
|
||||||
texture_cache.MarkRangeAsResolved(
|
texture_cache.MarkRangeAsResolved(
|
||||||
resolve_info.copy_dest_extent_start,
|
resolve_info.copy_dest_extent_start,
|
||||||
|
|
|
||||||
|
|
@ -477,6 +477,16 @@ VulkanTextureCache::~VulkanTextureCache() {
|
||||||
// textures before destroying VMA.
|
// textures before destroying VMA.
|
||||||
DestroyAllTextures(true);
|
DestroyAllTextures(true);
|
||||||
|
|
||||||
|
// Clean up scaled resolve buffers before destroying VMA
|
||||||
|
// The command processor should ensure all GPU operations are complete
|
||||||
|
// before the texture cache is destroyed
|
||||||
|
for (ScaledResolveBuffer& buffer : scaled_resolve_buffers_) {
|
||||||
|
if (buffer.buffer != VK_NULL_HANDLE) {
|
||||||
|
vmaDestroyBuffer(vma_allocator_, buffer.buffer, buffer.allocation);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
scaled_resolve_buffers_.clear();
|
||||||
|
|
||||||
if (vma_allocator_ != VK_NULL_HANDLE) {
|
if (vma_allocator_ != VK_NULL_HANDLE) {
|
||||||
vmaDestroyAllocator(vma_allocator_);
|
vmaDestroyAllocator(vma_allocator_);
|
||||||
}
|
}
|
||||||
|
|
@ -896,6 +906,7 @@ VkImageView VulkanTextureCache::RequestSwapTexture(
|
||||||
return VK_NULL_HANDLE;
|
return VK_NULL_HANDLE;
|
||||||
}
|
}
|
||||||
if (!LoadTextureData(*texture)) {
|
if (!LoadTextureData(*texture)) {
|
||||||
|
XELOGE("Failed to load texture data for swap texture");
|
||||||
return VK_NULL_HANDLE;
|
return VK_NULL_HANDLE;
|
||||||
}
|
}
|
||||||
texture->MarkAsUsed();
|
texture->MarkAsUsed();
|
||||||
|
|
@ -925,6 +936,13 @@ VkImageView VulkanTextureCache::RequestSwapTexture(
|
||||||
return texture_view;
|
return texture_view;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool VulkanTextureCache::IsScaledResolveSupportedForFormat(
|
||||||
|
TextureKey key) const {
|
||||||
|
// Check if the format has a valid host format pair, meaning we can handle it
|
||||||
|
const HostFormatPair& host_format_pair = GetHostFormatPair(key);
|
||||||
|
return host_format_pair.format_unsigned.format != VK_FORMAT_UNDEFINED;
|
||||||
|
}
|
||||||
|
|
||||||
bool VulkanTextureCache::IsSignedVersionSeparateForFormat(
|
bool VulkanTextureCache::IsSignedVersionSeparateForFormat(
|
||||||
TextureKey key) const {
|
TextureKey key) const {
|
||||||
const HostFormatPair& host_format_pair = GetHostFormatPair(key);
|
const HostFormatPair& host_format_pair = GetHostFormatPair(key);
|
||||||
|
|
@ -1262,7 +1280,6 @@ bool VulkanTextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture,
|
||||||
write_descriptor_set_dest.pTexelBufferView = nullptr;
|
write_descriptor_set_dest.pTexelBufferView = nullptr;
|
||||||
}
|
}
|
||||||
// TODO(Triang3l): Use a single 512 MB shared memory binding if possible.
|
// TODO(Triang3l): Use a single 512 MB shared memory binding if possible.
|
||||||
// TODO(Triang3l): Scaled resolve buffer bindings.
|
|
||||||
// Aligning because if the data for a vector in a storage buffer is provided
|
// Aligning because if the data for a vector in a storage buffer is provided
|
||||||
// partially, the value read may still be (0, 0, 0, 0), and small (especially
|
// partially, the value read may still be (0, 0, 0, 0), and small (especially
|
||||||
// linear) textures won't be loaded correctly.
|
// linear) textures won't be loaded correctly.
|
||||||
|
|
@ -1280,12 +1297,69 @@ bool VulkanTextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture,
|
||||||
if (!descriptor_set_source_base) {
|
if (!descriptor_set_source_base) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (texture_key.scaled_resolve) {
|
||||||
|
// For scaled textures, read from scaled resolve buffers
|
||||||
|
uint32_t guest_address = texture_key.base_page << 12;
|
||||||
|
uint32_t guest_size = vulkan_texture.GetGuestBaseSize();
|
||||||
|
|
||||||
|
// Ensure the scaled buffer exists
|
||||||
|
if (EnsureScaledResolveMemoryCommitted(guest_address, guest_size)) {
|
||||||
|
// Make the range current
|
||||||
|
if (MakeScaledResolveRangeCurrent(guest_address, guest_size)) {
|
||||||
|
VkBuffer scaled_buffer = GetCurrentScaledResolveBuffer();
|
||||||
|
if (scaled_buffer != VK_NULL_HANDLE) {
|
||||||
|
// Calculate offset within the scaled buffer
|
||||||
|
uint32_t draw_resolution_scale_area =
|
||||||
|
draw_resolution_scale_x() * draw_resolution_scale_y();
|
||||||
|
uint64_t scaled_offset =
|
||||||
|
uint64_t(guest_address) * draw_resolution_scale_area;
|
||||||
|
|
||||||
|
uint64_t buffer_relative_offset = 0;
|
||||||
|
if (scaled_resolve_current_buffer_index_ <
|
||||||
|
scaled_resolve_buffers_.size()) {
|
||||||
|
const ScaledResolveBuffer& current_buffer =
|
||||||
|
scaled_resolve_buffers_[scaled_resolve_current_buffer_index_];
|
||||||
|
buffer_relative_offset =
|
||||||
|
scaled_offset - current_buffer.range_start_scaled;
|
||||||
|
}
|
||||||
|
|
||||||
|
write_descriptor_set_source_base_buffer_info.buffer = scaled_buffer;
|
||||||
|
write_descriptor_set_source_base_buffer_info.offset =
|
||||||
|
buffer_relative_offset;
|
||||||
|
write_descriptor_set_source_base_buffer_info.range =
|
||||||
|
xe::align(guest_size * draw_resolution_scale_area,
|
||||||
|
source_length_alignment);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
XELOGE(
|
||||||
|
"Scaled resolve texture load: Failed to get current scaled "
|
||||||
|
"buffer for texture at 0x{:08X}",
|
||||||
|
guest_address);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
XELOGE(
|
||||||
|
"Scaled resolve texture load: Failed to make range current for "
|
||||||
|
"texture at 0x{:08X}",
|
||||||
|
guest_address);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
XELOGE(
|
||||||
|
"Scaled resolve texture load: Failed to ensure scaled memory for "
|
||||||
|
"texture at 0x{:08X}",
|
||||||
|
guest_address);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Regular unscaled texture - use shared memory
|
||||||
write_descriptor_set_source_base_buffer_info.buffer =
|
write_descriptor_set_source_base_buffer_info.buffer =
|
||||||
vulkan_shared_memory.buffer();
|
vulkan_shared_memory.buffer();
|
||||||
write_descriptor_set_source_base_buffer_info.offset = texture_key.base_page
|
write_descriptor_set_source_base_buffer_info.offset =
|
||||||
<< 12;
|
texture_key.base_page << 12;
|
||||||
write_descriptor_set_source_base_buffer_info.range =
|
write_descriptor_set_source_base_buffer_info.range =
|
||||||
xe::align(vulkan_texture.GetGuestBaseSize(), source_length_alignment);
|
xe::align(vulkan_texture.GetGuestBaseSize(), source_length_alignment);
|
||||||
|
}
|
||||||
VkWriteDescriptorSet& write_descriptor_set_source_base =
|
VkWriteDescriptorSet& write_descriptor_set_source_base =
|
||||||
write_descriptor_sets[write_descriptor_set_count++];
|
write_descriptor_sets[write_descriptor_set_count++];
|
||||||
write_descriptor_set_source_base.sType =
|
write_descriptor_set_source_base.sType =
|
||||||
|
|
@ -1310,6 +1384,10 @@ bool VulkanTextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture,
|
||||||
if (!descriptor_set_source_mips) {
|
if (!descriptor_set_source_mips) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
// TODO: Implement scaled mips support similar to D3D12.
|
||||||
|
// Currently mips are always loaded from unscaled shared memory even when
|
||||||
|
// the base texture is scaled. D3D12 properly handles scaled mips in
|
||||||
|
// D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl.
|
||||||
write_descriptor_set_source_mips_buffer_info.buffer =
|
write_descriptor_set_source_mips_buffer_info.buffer =
|
||||||
vulkan_shared_memory.buffer();
|
vulkan_shared_memory.buffer();
|
||||||
write_descriptor_set_source_mips_buffer_info.offset = texture_key.mip_page
|
write_descriptor_set_source_mips_buffer_info.offset = texture_key.mip_page
|
||||||
|
|
@ -1700,10 +1778,7 @@ VulkanTextureCache::VulkanTextureCache(
|
||||||
: TextureCache(register_file, shared_memory, draw_resolution_scale_x,
|
: TextureCache(register_file, shared_memory, draw_resolution_scale_x,
|
||||||
draw_resolution_scale_y),
|
draw_resolution_scale_y),
|
||||||
command_processor_(command_processor),
|
command_processor_(command_processor),
|
||||||
guest_shader_pipeline_stages_(guest_shader_pipeline_stages) {
|
guest_shader_pipeline_stages_(guest_shader_pipeline_stages) {}
|
||||||
// TODO(Triang3l): Support draw resolution scaling.
|
|
||||||
assert_true(draw_resolution_scale_x == 1 && draw_resolution_scale_y == 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool VulkanTextureCache::Initialize() {
|
bool VulkanTextureCache::Initialize() {
|
||||||
const ui::vulkan::VulkanDevice* const vulkan_device =
|
const ui::vulkan::VulkanDevice* const vulkan_device =
|
||||||
|
|
@ -2656,6 +2731,160 @@ xenos::ClampMode VulkanTextureCache::NormalizeClampMode(
|
||||||
return clamp_mode;
|
return clamp_mode;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool VulkanTextureCache::EnsureScaledResolveMemoryCommitted(
|
||||||
|
uint32_t start_unscaled, uint32_t length_unscaled,
|
||||||
|
uint32_t length_scaled_alignment_log2) {
|
||||||
|
if (!IsDrawResolutionScaled()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (length_unscaled == 0) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (start_unscaled > SharedMemory::kBufferSize ||
|
||||||
|
(SharedMemory::kBufferSize - start_unscaled) < length_unscaled) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t draw_resolution_scale_area =
|
||||||
|
draw_resolution_scale_x() * draw_resolution_scale_y();
|
||||||
|
uint64_t start_scaled = uint64_t(start_unscaled) * draw_resolution_scale_area;
|
||||||
|
uint64_t length_scaled_alignment_bits =
|
||||||
|
(UINT64_C(1) << length_scaled_alignment_log2) - 1;
|
||||||
|
uint64_t length_scaled =
|
||||||
|
(uint64_t(length_unscaled) * draw_resolution_scale_area +
|
||||||
|
length_scaled_alignment_bits) &
|
||||||
|
~length_scaled_alignment_bits;
|
||||||
|
|
||||||
|
// Check if any existing buffer covers this range
|
||||||
|
|
||||||
|
bool range_covered = false;
|
||||||
|
for (const ScaledResolveBuffer& buffer : scaled_resolve_buffers_) {
|
||||||
|
if (buffer.range_start_scaled <= start_scaled &&
|
||||||
|
(buffer.range_start_scaled + buffer.range_length_scaled) >=
|
||||||
|
(start_scaled + length_scaled)) {
|
||||||
|
// This buffer covers the requested range
|
||||||
|
scaled_resolve_current_range_start_scaled_ = buffer.range_start_scaled;
|
||||||
|
scaled_resolve_current_range_length_scaled_ = buffer.range_length_scaled;
|
||||||
|
range_covered = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!range_covered) {
|
||||||
|
// Need to create a new buffer or extend an existing one
|
||||||
|
// For simplicity and to avoid fragmentation, we'll use a fixed-size buffer
|
||||||
|
// approach similar to D3D12 (but smaller - 256MB chunks instead of 2GB)
|
||||||
|
constexpr uint64_t kBufferSize = 256 * 1024 * 1024; // 256MB per buffer
|
||||||
|
|
||||||
|
// Round up the range to cover complete buffer chunks
|
||||||
|
uint64_t buffer_start = (start_scaled / kBufferSize) * kBufferSize;
|
||||||
|
uint64_t buffer_end =
|
||||||
|
((start_scaled + length_scaled + kBufferSize - 1) / kBufferSize) *
|
||||||
|
kBufferSize;
|
||||||
|
uint64_t buffer_size = buffer_end - buffer_start;
|
||||||
|
|
||||||
|
// Check again if this expanded range is covered
|
||||||
|
bool expanded_range_covered = false;
|
||||||
|
for (const ScaledResolveBuffer& buffer : scaled_resolve_buffers_) {
|
||||||
|
if (buffer.range_start_scaled <= buffer_start &&
|
||||||
|
(buffer.range_start_scaled + buffer.range_length_scaled) >=
|
||||||
|
buffer_end) {
|
||||||
|
scaled_resolve_current_range_start_scaled_ = buffer.range_start_scaled;
|
||||||
|
scaled_resolve_current_range_length_scaled_ =
|
||||||
|
buffer.range_length_scaled;
|
||||||
|
expanded_range_covered = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!expanded_range_covered) {
|
||||||
|
// Limit the number of buffers to prevent unbounded growth
|
||||||
|
constexpr size_t kMaxBuffers = 32; // Maximum 8GB total (32 * 256MB)
|
||||||
|
if (scaled_resolve_buffers_.size() >= kMaxBuffers) {
|
||||||
|
// Reuse the least recently used buffer
|
||||||
|
// For now, just reuse the first buffer (simple LRU would be better)
|
||||||
|
ScaledResolveBuffer& reused_buffer = scaled_resolve_buffers_[0];
|
||||||
|
reused_buffer.range_start_scaled = buffer_start;
|
||||||
|
reused_buffer.range_length_scaled = buffer_size;
|
||||||
|
scaled_resolve_current_range_start_scaled_ = buffer_start;
|
||||||
|
scaled_resolve_current_range_length_scaled_ = buffer_size;
|
||||||
|
} else {
|
||||||
|
ScaledResolveBuffer new_buffer;
|
||||||
|
new_buffer.size = buffer_size;
|
||||||
|
|
||||||
|
VkBufferCreateInfo buffer_create_info = {};
|
||||||
|
buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
||||||
|
buffer_create_info.size = new_buffer.size;
|
||||||
|
buffer_create_info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
|
||||||
|
buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
||||||
|
|
||||||
|
VmaAllocationCreateInfo allocation_create_info = {};
|
||||||
|
allocation_create_info.usage = VMA_MEMORY_USAGE_GPU_ONLY;
|
||||||
|
|
||||||
|
VkResult result = vmaCreateBuffer(
|
||||||
|
vma_allocator_, &buffer_create_info, &allocation_create_info,
|
||||||
|
&new_buffer.buffer, &new_buffer.allocation, nullptr);
|
||||||
|
|
||||||
|
if (result != VK_SUCCESS) {
|
||||||
|
XELOGE(
|
||||||
|
"VulkanTextureCache: Failed to create scaled resolve buffer: {}",
|
||||||
|
static_cast<int>(result));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
new_buffer.range_start_scaled = buffer_start;
|
||||||
|
new_buffer.range_length_scaled = buffer_size;
|
||||||
|
|
||||||
|
scaled_resolve_buffers_.push_back(new_buffer);
|
||||||
|
scaled_resolve_current_range_start_scaled_ = buffer_start;
|
||||||
|
scaled_resolve_current_range_length_scaled_ = buffer_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool VulkanTextureCache::MakeScaledResolveRangeCurrent(
|
||||||
|
uint32_t start_unscaled, uint32_t length_unscaled,
|
||||||
|
uint32_t length_scaled_alignment_log2) {
|
||||||
|
if (!IsDrawResolutionScaled()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// First ensure the memory is committed (creates buffers if needed)
|
||||||
|
if (!EnsureScaledResolveMemoryCommitted(start_unscaled, length_unscaled,
|
||||||
|
length_scaled_alignment_log2)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t draw_resolution_scale_area =
|
||||||
|
draw_resolution_scale_x() * draw_resolution_scale_y();
|
||||||
|
uint64_t start_scaled = uint64_t(start_unscaled) * draw_resolution_scale_area;
|
||||||
|
|
||||||
|
// Find which buffer contains this range
|
||||||
|
for (size_t i = 0; i < scaled_resolve_buffers_.size(); ++i) {
|
||||||
|
const ScaledResolveBuffer& buffer = scaled_resolve_buffers_[i];
|
||||||
|
if (start_scaled >= buffer.range_start_scaled &&
|
||||||
|
start_scaled <
|
||||||
|
(buffer.range_start_scaled + buffer.range_length_scaled)) {
|
||||||
|
scaled_resolve_current_buffer_index_ = i;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
VkBuffer VulkanTextureCache::GetCurrentScaledResolveBuffer() const {
|
||||||
|
if (scaled_resolve_current_buffer_index_ >= scaled_resolve_buffers_.size()) {
|
||||||
|
return VK_NULL_HANDLE;
|
||||||
|
}
|
||||||
|
return scaled_resolve_buffers_[scaled_resolve_current_buffer_index_].buffer;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace vulkan
|
} // namespace vulkan
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
} // namespace xe
|
} // namespace xe
|
||||||
|
|
|
||||||
|
|
@ -121,7 +121,42 @@ class VulkanTextureCache final : public TextureCache {
|
||||||
uint32_t& height_scaled_out,
|
uint32_t& height_scaled_out,
|
||||||
xenos::TextureFormat& format_out);
|
xenos::TextureFormat& format_out);
|
||||||
|
|
||||||
|
// Scaled resolve buffer management (for use by VulkanRenderTargetCache)
|
||||||
|
struct ScaledResolveBuffer {
|
||||||
|
VkBuffer buffer = VK_NULL_HANDLE;
|
||||||
|
VmaAllocation allocation = VK_NULL_HANDLE;
|
||||||
|
uint64_t size = 0;
|
||||||
|
uint64_t range_start_scaled = 0;
|
||||||
|
uint64_t range_length_scaled = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Public scaled resolve buffer methods for use by VulkanRenderTargetCache
|
||||||
|
bool EnsureScaledResolveMemoryCommittedPublic(
|
||||||
|
uint32_t start_unscaled, uint32_t length_unscaled,
|
||||||
|
uint32_t length_scaled_alignment_log2 = 0) {
|
||||||
|
return EnsureScaledResolveMemoryCommitted(start_unscaled, length_unscaled,
|
||||||
|
length_scaled_alignment_log2);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MakeScaledResolveRangeCurrent(uint32_t start_unscaled,
|
||||||
|
uint32_t length_unscaled,
|
||||||
|
uint32_t length_scaled_alignment_log2 = 0);
|
||||||
|
|
||||||
|
VkBuffer GetCurrentScaledResolveBuffer() const;
|
||||||
|
|
||||||
|
size_t GetScaledResolveCurrentBufferIndex() const {
|
||||||
|
return scaled_resolve_current_buffer_index_;
|
||||||
|
}
|
||||||
|
|
||||||
|
const ScaledResolveBuffer* GetScaledResolveBufferInfo(size_t index) const {
|
||||||
|
if (index < scaled_resolve_buffers_.size()) {
|
||||||
|
return &scaled_resolve_buffers_[index];
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
bool IsScaledResolveSupportedForFormat(TextureKey key) const override;
|
||||||
bool IsSignedVersionSeparateForFormat(TextureKey key) const override;
|
bool IsSignedVersionSeparateForFormat(TextureKey key) const override;
|
||||||
uint32_t GetHostFormatSwizzle(TextureKey key) const override;
|
uint32_t GetHostFormatSwizzle(TextureKey key) const override;
|
||||||
|
|
||||||
|
|
@ -135,6 +170,10 @@ class VulkanTextureCache final : public TextureCache {
|
||||||
bool LoadTextureDataFromResidentMemoryImpl(Texture& texture, bool load_base,
|
bool LoadTextureDataFromResidentMemoryImpl(Texture& texture, bool load_base,
|
||||||
bool load_mips) override;
|
bool load_mips) override;
|
||||||
|
|
||||||
|
bool EnsureScaledResolveMemoryCommitted(
|
||||||
|
uint32_t start_unscaled, uint32_t length_unscaled,
|
||||||
|
uint32_t length_scaled_alignment_log2 = 0) override;
|
||||||
|
|
||||||
void UpdateTextureBindingsImpl(uint32_t fetch_constant_mask) override;
|
void UpdateTextureBindingsImpl(uint32_t fetch_constant_mask) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
@ -352,6 +391,13 @@ class VulkanTextureCache final : public TextureCache {
|
||||||
samplers_;
|
samplers_;
|
||||||
std::pair<const SamplerParameters, Sampler>* sampler_used_first_ = nullptr;
|
std::pair<const SamplerParameters, Sampler>* sampler_used_first_ = nullptr;
|
||||||
std::pair<const SamplerParameters, Sampler>* sampler_used_last_ = nullptr;
|
std::pair<const SamplerParameters, Sampler>* sampler_used_last_ = nullptr;
|
||||||
|
|
||||||
|
// Scaled resolve buffer storage
|
||||||
|
std::vector<ScaledResolveBuffer> scaled_resolve_buffers_;
|
||||||
|
// Current scaled resolve range tracking
|
||||||
|
uint64_t scaled_resolve_current_range_start_scaled_ = 0;
|
||||||
|
uint64_t scaled_resolve_current_range_length_scaled_ = 0;
|
||||||
|
size_t scaled_resolve_current_buffer_index_ = SIZE_MAX;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace vulkan
|
} // namespace vulkan
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue