rsx: Speed up slice gathering for large datasets

- We can cut down by an order of magnitude the amount of checks we do to construct objects
This commit is contained in:
kd-11 2025-11-06 11:19:29 +03:00 committed by kd-11
parent 1007aaee4c
commit 864bb9f0db

View file

@ -262,18 +262,24 @@ namespace rsx
u64 tag; // Timestamp u64 tag; // Timestamp
u32 list; // List source, 0 = fbo, 1 = local u32 list; // List source, 0 = fbo, 1 = local
u32 index; // Index in list u32 index; // Index in list
utils::address_range32 bounds;
}; };
std::vector<sort_helper> sort_list; const u32 available_slices = fbos.size() + local.size();
bool unordered_list = false;
rsx::simple_array<sort_helper> sort_list;
rsx::simple_array<utils::address_range32> sort_ranges;
sort_list.reserve(available_slices);
sort_ranges.reserve(available_slices);
if (!fbos.empty() && !local.empty())
{
// Generate sorting tree if both resources are available and overlapping // Generate sorting tree if both resources are available and overlapping
sort_list.reserve(fbos.size() + local.size());
for (u32 index = 0; index < fbos.size(); ++index) for (u32 index = 0; index < fbos.size(); ++index)
{ {
sort_list.push_back({ fbos[index].surface->last_use_tag, 0, index }); const auto range = fbos[index].surface->get_memory_range();
sort_ranges.push_back(range);
sort_list.push_back({ fbos[index].surface->last_use_tag, 0, index, range });
} }
for (u32 index = 0; index < local.size(); ++index) for (u32 index = 0; index < local.size(); ++index)
@ -281,13 +287,32 @@ namespace rsx
if (local[index]->get_context() != rsx::texture_upload_context::blit_engine_dst) if (local[index]->get_context() != rsx::texture_upload_context::blit_engine_dst)
continue; continue;
sort_list.push_back({ local[index]->last_write_tag, 1, index }); const auto range = local[index]->get_section_range();
sort_ranges.push_back(range);
sort_list.push_back({ local[index]->last_write_tag, 1, index, range });
} }
std::sort(sort_list.begin(), sort_list.end(), FN(x.tag < y.tag)); if (!fbos.empty() && !local.empty())
{
sort_list.sort(FN(x.tag < y.tag));
} }
auto add_rtt_resource = [&](auto& section, u16 slice) // Check if ordered
for (u32 i = 0; i < sort_list.size(); ++i)
{
if (i == 0)
{
continue;
}
if (sort_ranges[i].start < sort_ranges[i - 1].end)
{
unordered_list = true;
break;
}
}
auto add_rtt_resource = [&](auto& section, u16 slice) -> std::pair<bool, bool> // [ input fully consumed, output fully covered ]
{ {
const u32 slice_begin = (slice * attr.slice_h); const u32 slice_begin = (slice * attr.slice_h);
const u32 slice_end = (slice_begin + attr.height); const u32 slice_end = (slice_begin + attr.height);
@ -296,7 +321,7 @@ namespace rsx
if (section.dst_area.y >= slice_end || section_end <= slice_begin) if (section.dst_area.y >= slice_end || section_end <= slice_begin)
{ {
// Belongs to a different slice // Belongs to a different slice
return; return { section_end <= slice_begin, false };
} }
// How much of this slice to read? // How much of this slice to read?
@ -346,9 +371,11 @@ namespace rsx
.dst_w = dst_width, .dst_w = dst_width,
.dst_h = dst_height .dst_h = dst_height
}); });
return { section_end <= slice_end, section_end >= slice_end };
}; };
auto add_local_resource = [&](auto& section, u32 address, u16 slice, bool scaling = true) auto add_local_resource = [&](auto& section, u32 address, u16 slice, bool scaling = true) -> std::pair<bool, bool> // [ input fully consumed, output fully covered ]
{ {
// Intersect this resource with the original one. // Intersect this resource with the original one.
// Note that intersection takes place in a normalized coordinate space (bpp = 1) // Note that intersection takes place in a normalized coordinate space (bpp = 1)
@ -364,7 +391,7 @@ namespace rsx
if (!dimensions.width || !dimensions.height) if (!dimensions.width || !dimensions.height)
{ {
// Out of bounds, invalid intersection // Out of bounds, invalid intersection
return; return { false, false };
} }
// The intersection takes place in a normalized coordinate space. Now we convert back to domain-specific // The intersection takes place in a normalized coordinate space. Now we convert back to domain-specific
@ -383,7 +410,7 @@ namespace rsx
if (dst_y >= dst_slice_end || write_section_end <= dst_slice_begin) if (dst_y >= dst_slice_end || write_section_end <= dst_slice_begin)
{ {
// Belongs to a different slice // Belongs to a different slice
return; return { write_section_end <= dst_slice_begin, false };
} }
const u16 dst_w = static_cast<u16>(dst_size.width); const u16 dst_w = static_cast<u16>(dst_size.width);
@ -411,9 +438,10 @@ namespace rsx
.dst_w = _dst_w, .dst_w = _dst_w,
.dst_h = _dst_h .dst_h = _dst_h
}); });
return { write_section_end <= dst_slice_end, write_section_end >= dst_slice_end };
} }
else
{
out.push_back out.push_back
({ ({
.src = section->get_raw_texture(), .src = section->get_raw_texture(),
@ -429,7 +457,8 @@ namespace rsx
.dst_w = dst_w, .dst_w = dst_w,
.dst_h = height .dst_h = height
}); });
}
return { write_section_end <= dst_slice_end, write_section_end >= dst_slice_end };
}; };
u32 current_address = attr.address; u32 current_address = attr.address;
@ -442,34 +471,36 @@ namespace rsx
for (u16 slice = 0; slice < count; ++slice) for (u16 slice = 0; slice < count; ++slice)
{ {
auto num_surface = out.size(); const auto num_surface = out.size();
const auto slice_range = utils::address_range32::start_length(current_address, slice_size);
if (local.empty()) [[likely]] for (auto& e : sort_list)
{ {
for (auto& section : fbos) if (e.index == umax || !slice_range.overlaps(e.bounds))
{ {
add_rtt_resource(section, slice); continue;
} }
}
else if (fbos.empty()) bool remove = false, slice_complete = false;
{
for (auto& section : local)
{
add_local_resource(section, current_address, slice, false);
}
}
else
{
for (const auto& e : sort_list)
{
if (e.list == 0) if (e.list == 0)
{ {
add_rtt_resource(fbos[e.index], slice); std::tie(remove, slice_complete) = add_rtt_resource(fbos[e.index], slice);
} }
else else
{ {
add_local_resource(local[e.index], current_address, slice); std::tie(remove, slice_complete) = add_local_resource(local[e.index], current_address, slice);
} }
if (remove)
{
// If we got here, the section has been fully ingested by the current slice and will never match again.
e.index = umax;
}
if (slice_complete && !unordered_list)
{
// Reached the end of the current slice and we are guaranteed to not match any other section since they lie after this one in memory.
break;
} }
} }