rsx: Speed up slice gathering for large datasets

- We can cut down by an order of magnitude the amount of checks we do to construct objects
This commit is contained in:
kd-11 2025-11-06 11:19:29 +03:00 committed by kd-11
parent 1007aaee4c
commit 864bb9f0db

View file

@ -262,18 +262,24 @@ namespace rsx
u64 tag; // Timestamp
u32 list; // List source, 0 = fbo, 1 = local
u32 index; // Index in list
utils::address_range32 bounds;
};
std::vector<sort_helper> sort_list;
const u32 available_slices = fbos.size() + local.size();
bool unordered_list = false;
rsx::simple_array<sort_helper> sort_list;
rsx::simple_array<utils::address_range32> sort_ranges;
sort_list.reserve(available_slices);
sort_ranges.reserve(available_slices);
if (!fbos.empty() && !local.empty())
{
// Generate sorting tree if both resources are available and overlapping
sort_list.reserve(fbos.size() + local.size());
for (u32 index = 0; index < fbos.size(); ++index)
{
sort_list.push_back({ fbos[index].surface->last_use_tag, 0, index });
const auto range = fbos[index].surface->get_memory_range();
sort_ranges.push_back(range);
sort_list.push_back({ fbos[index].surface->last_use_tag, 0, index, range });
}
for (u32 index = 0; index < local.size(); ++index)
@ -281,13 +287,32 @@ namespace rsx
if (local[index]->get_context() != rsx::texture_upload_context::blit_engine_dst)
continue;
sort_list.push_back({ local[index]->last_write_tag, 1, index });
const auto range = local[index]->get_section_range();
sort_ranges.push_back(range);
sort_list.push_back({ local[index]->last_write_tag, 1, index, range });
}
std::sort(sort_list.begin(), sort_list.end(), FN(x.tag < y.tag));
if (!fbos.empty() && !local.empty())
{
sort_list.sort(FN(x.tag < y.tag));
}
auto add_rtt_resource = [&](auto& section, u16 slice)
// Check if ordered
for (u32 i = 0; i < sort_list.size(); ++i)
{
if (i == 0)
{
continue;
}
if (sort_ranges[i].start < sort_ranges[i - 1].end)
{
unordered_list = true;
break;
}
}
auto add_rtt_resource = [&](auto& section, u16 slice) -> std::pair<bool, bool> // [ input fully consumed, output fully covered ]
{
const u32 slice_begin = (slice * attr.slice_h);
const u32 slice_end = (slice_begin + attr.height);
@ -296,7 +321,7 @@ namespace rsx
if (section.dst_area.y >= slice_end || section_end <= slice_begin)
{
// Belongs to a different slice
return;
return { section_end <= slice_begin, false };
}
// How much of this slice to read?
@ -346,9 +371,11 @@ namespace rsx
.dst_w = dst_width,
.dst_h = dst_height
});
return { section_end <= slice_end, section_end >= slice_end };
};
auto add_local_resource = [&](auto& section, u32 address, u16 slice, bool scaling = true)
auto add_local_resource = [&](auto& section, u32 address, u16 slice, bool scaling = true) -> std::pair<bool, bool> // [ input fully consumed, output fully covered ]
{
// Intersect this resource with the original one.
// Note that intersection takes place in a normalized coordinate space (bpp = 1)
@ -364,7 +391,7 @@ namespace rsx
if (!dimensions.width || !dimensions.height)
{
// Out of bounds, invalid intersection
return;
return { false, false };
}
// The intersection takes place in a normalized coordinate space. Now we convert back to domain-specific
@ -383,7 +410,7 @@ namespace rsx
if (dst_y >= dst_slice_end || write_section_end <= dst_slice_begin)
{
// Belongs to a different slice
return;
return { write_section_end <= dst_slice_begin, false };
}
const u16 dst_w = static_cast<u16>(dst_size.width);
@ -411,9 +438,10 @@ namespace rsx
.dst_w = _dst_w,
.dst_h = _dst_h
});
return { write_section_end <= dst_slice_end, write_section_end >= dst_slice_end };
}
else
{
out.push_back
({
.src = section->get_raw_texture(),
@ -429,7 +457,8 @@ namespace rsx
.dst_w = dst_w,
.dst_h = height
});
}
return { write_section_end <= dst_slice_end, write_section_end >= dst_slice_end };
};
u32 current_address = attr.address;
@ -442,34 +471,36 @@ namespace rsx
for (u16 slice = 0; slice < count; ++slice)
{
auto num_surface = out.size();
const auto num_surface = out.size();
const auto slice_range = utils::address_range32::start_length(current_address, slice_size);
if (local.empty()) [[likely]]
for (auto& e : sort_list)
{
for (auto& section : fbos)
if (e.index == umax || !slice_range.overlaps(e.bounds))
{
add_rtt_resource(section, slice);
continue;
}
}
else if (fbos.empty())
{
for (auto& section : local)
{
add_local_resource(section, current_address, slice, false);
}
}
else
{
for (const auto& e : sort_list)
{
bool remove = false, slice_complete = false;
if (e.list == 0)
{
add_rtt_resource(fbos[e.index], slice);
std::tie(remove, slice_complete) = add_rtt_resource(fbos[e.index], slice);
}
else
{
add_local_resource(local[e.index], current_address, slice);
std::tie(remove, slice_complete) = add_local_resource(local[e.index], current_address, slice);
}
if (remove)
{
// If we got here, the section has been fully ingested by the current slice and will never match again.
e.index = umax;
}
if (slice_complete && !unordered_list)
{
// Reached the end of the current slice and we are guaranteed to not match any other section since they lie after this one in memory.
break;
}
}