mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-12-06 07:12:28 +01:00
rsx: Speed up slice gathering for large datasets
- We can cut down by an order of magnitude the amount of checks we do to construct objects
This commit is contained in:
parent
1007aaee4c
commit
864bb9f0db
|
|
@ -262,32 +262,57 @@ namespace rsx
|
||||||
u64 tag; // Timestamp
|
u64 tag; // Timestamp
|
||||||
u32 list; // List source, 0 = fbo, 1 = local
|
u32 list; // List source, 0 = fbo, 1 = local
|
||||||
u32 index; // Index in list
|
u32 index; // Index in list
|
||||||
|
|
||||||
|
utils::address_range32 bounds;
|
||||||
};
|
};
|
||||||
|
|
||||||
std::vector<sort_helper> sort_list;
|
const u32 available_slices = fbos.size() + local.size();
|
||||||
|
bool unordered_list = false;
|
||||||
|
|
||||||
|
rsx::simple_array<sort_helper> sort_list;
|
||||||
|
rsx::simple_array<utils::address_range32> sort_ranges;
|
||||||
|
sort_list.reserve(available_slices);
|
||||||
|
sort_ranges.reserve(available_slices);
|
||||||
|
|
||||||
|
// Generate sorting tree if both resources are available and overlapping
|
||||||
|
for (u32 index = 0; index < fbos.size(); ++index)
|
||||||
|
{
|
||||||
|
const auto range = fbos[index].surface->get_memory_range();
|
||||||
|
sort_ranges.push_back(range);
|
||||||
|
sort_list.push_back({ fbos[index].surface->last_use_tag, 0, index, range });
|
||||||
|
}
|
||||||
|
|
||||||
|
for (u32 index = 0; index < local.size(); ++index)
|
||||||
|
{
|
||||||
|
if (local[index]->get_context() != rsx::texture_upload_context::blit_engine_dst)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
const auto range = local[index]->get_section_range();
|
||||||
|
sort_ranges.push_back(range);
|
||||||
|
sort_list.push_back({ local[index]->last_write_tag, 1, index, range });
|
||||||
|
}
|
||||||
|
|
||||||
if (!fbos.empty() && !local.empty())
|
if (!fbos.empty() && !local.empty())
|
||||||
{
|
{
|
||||||
// Generate sorting tree if both resources are available and overlapping
|
sort_list.sort(FN(x.tag < y.tag));
|
||||||
sort_list.reserve(fbos.size() + local.size());
|
|
||||||
|
|
||||||
for (u32 index = 0; index < fbos.size(); ++index)
|
|
||||||
{
|
|
||||||
sort_list.push_back({ fbos[index].surface->last_use_tag, 0, index });
|
|
||||||
}
|
|
||||||
|
|
||||||
for (u32 index = 0; index < local.size(); ++index)
|
|
||||||
{
|
|
||||||
if (local[index]->get_context() != rsx::texture_upload_context::blit_engine_dst)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
sort_list.push_back({ local[index]->last_write_tag, 1, index });
|
|
||||||
}
|
|
||||||
|
|
||||||
std::sort(sort_list.begin(), sort_list.end(), FN(x.tag < y.tag));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
auto add_rtt_resource = [&](auto& section, u16 slice)
|
// Check if ordered
|
||||||
|
for (u32 i = 0; i < sort_list.size(); ++i)
|
||||||
|
{
|
||||||
|
if (i == 0)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sort_ranges[i].start < sort_ranges[i - 1].end)
|
||||||
|
{
|
||||||
|
unordered_list = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto add_rtt_resource = [&](auto& section, u16 slice) -> std::pair<bool, bool> // [ input fully consumed, output fully covered ]
|
||||||
{
|
{
|
||||||
const u32 slice_begin = (slice * attr.slice_h);
|
const u32 slice_begin = (slice * attr.slice_h);
|
||||||
const u32 slice_end = (slice_begin + attr.height);
|
const u32 slice_end = (slice_begin + attr.height);
|
||||||
|
|
@ -296,7 +321,7 @@ namespace rsx
|
||||||
if (section.dst_area.y >= slice_end || section_end <= slice_begin)
|
if (section.dst_area.y >= slice_end || section_end <= slice_begin)
|
||||||
{
|
{
|
||||||
// Belongs to a different slice
|
// Belongs to a different slice
|
||||||
return;
|
return { section_end <= slice_begin, false };
|
||||||
}
|
}
|
||||||
|
|
||||||
// How much of this slice to read?
|
// How much of this slice to read?
|
||||||
|
|
@ -346,9 +371,11 @@ namespace rsx
|
||||||
.dst_w = dst_width,
|
.dst_w = dst_width,
|
||||||
.dst_h = dst_height
|
.dst_h = dst_height
|
||||||
});
|
});
|
||||||
|
|
||||||
|
return { section_end <= slice_end, section_end >= slice_end };
|
||||||
};
|
};
|
||||||
|
|
||||||
auto add_local_resource = [&](auto& section, u32 address, u16 slice, bool scaling = true)
|
auto add_local_resource = [&](auto& section, u32 address, u16 slice, bool scaling = true) -> std::pair<bool, bool> // [ input fully consumed, output fully covered ]
|
||||||
{
|
{
|
||||||
// Intersect this resource with the original one.
|
// Intersect this resource with the original one.
|
||||||
// Note that intersection takes place in a normalized coordinate space (bpp = 1)
|
// Note that intersection takes place in a normalized coordinate space (bpp = 1)
|
||||||
|
|
@ -364,7 +391,7 @@ namespace rsx
|
||||||
if (!dimensions.width || !dimensions.height)
|
if (!dimensions.width || !dimensions.height)
|
||||||
{
|
{
|
||||||
// Out of bounds, invalid intersection
|
// Out of bounds, invalid intersection
|
||||||
return;
|
return { false, false };
|
||||||
}
|
}
|
||||||
|
|
||||||
// The intersection takes place in a normalized coordinate space. Now we convert back to domain-specific
|
// The intersection takes place in a normalized coordinate space. Now we convert back to domain-specific
|
||||||
|
|
@ -383,7 +410,7 @@ namespace rsx
|
||||||
if (dst_y >= dst_slice_end || write_section_end <= dst_slice_begin)
|
if (dst_y >= dst_slice_end || write_section_end <= dst_slice_begin)
|
||||||
{
|
{
|
||||||
// Belongs to a different slice
|
// Belongs to a different slice
|
||||||
return;
|
return { write_section_end <= dst_slice_begin, false };
|
||||||
}
|
}
|
||||||
|
|
||||||
const u16 dst_w = static_cast<u16>(dst_size.width);
|
const u16 dst_w = static_cast<u16>(dst_size.width);
|
||||||
|
|
@ -411,25 +438,27 @@ namespace rsx
|
||||||
.dst_w = _dst_w,
|
.dst_w = _dst_w,
|
||||||
.dst_h = _dst_h
|
.dst_h = _dst_h
|
||||||
});
|
});
|
||||||
|
|
||||||
|
return { write_section_end <= dst_slice_end, write_section_end >= dst_slice_end };
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
out.push_back
|
||||||
out.push_back
|
({
|
||||||
({
|
.src = section->get_raw_texture(),
|
||||||
.src = section->get_raw_texture(),
|
.xform = surface_transform::identity,
|
||||||
.xform = surface_transform::identity,
|
.level = 0,
|
||||||
.level = 0,
|
.src_x = static_cast<u16>(src_offset.x), // src.x
|
||||||
.src_x = static_cast<u16>(src_offset.x), // src.x
|
.src_y = static_cast<u16>(src_offset.y), // src.y
|
||||||
.src_y = static_cast<u16>(src_offset.y), // src.y
|
.dst_x = static_cast<u16>(dst_offset.x), // dst.x
|
||||||
.dst_x = static_cast<u16>(dst_offset.x), // dst.x
|
.dst_y = static_cast<u16>(dst_y - dst_slice_begin), // dst.y
|
||||||
.dst_y = static_cast<u16>(dst_y - dst_slice_begin), // dst.y
|
.dst_z = 0,
|
||||||
.dst_z = 0,
|
.src_w = src_w,
|
||||||
.src_w = src_w,
|
.src_h = height,
|
||||||
.src_h = height,
|
.dst_w = dst_w,
|
||||||
.dst_w = dst_w,
|
.dst_h = height
|
||||||
.dst_h = height
|
});
|
||||||
});
|
|
||||||
}
|
return { write_section_end <= dst_slice_end, write_section_end >= dst_slice_end };
|
||||||
};
|
};
|
||||||
|
|
||||||
u32 current_address = attr.address;
|
u32 current_address = attr.address;
|
||||||
|
|
@ -442,34 +471,36 @@ namespace rsx
|
||||||
|
|
||||||
for (u16 slice = 0; slice < count; ++slice)
|
for (u16 slice = 0; slice < count; ++slice)
|
||||||
{
|
{
|
||||||
auto num_surface = out.size();
|
const auto num_surface = out.size();
|
||||||
|
const auto slice_range = utils::address_range32::start_length(current_address, slice_size);
|
||||||
|
|
||||||
if (local.empty()) [[likely]]
|
for (auto& e : sort_list)
|
||||||
{
|
{
|
||||||
for (auto& section : fbos)
|
if (e.index == umax || !slice_range.overlaps(e.bounds))
|
||||||
{
|
{
|
||||||
add_rtt_resource(section, slice);
|
continue;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
else if (fbos.empty())
|
bool remove = false, slice_complete = false;
|
||||||
{
|
if (e.list == 0)
|
||||||
for (auto& section : local)
|
|
||||||
{
|
{
|
||||||
add_local_resource(section, current_address, slice, false);
|
std::tie(remove, slice_complete) = add_rtt_resource(fbos[e.index], slice);
|
||||||
}
|
}
|
||||||
}
|
else
|
||||||
else
|
|
||||||
{
|
|
||||||
for (const auto& e : sort_list)
|
|
||||||
{
|
{
|
||||||
if (e.list == 0)
|
std::tie(remove, slice_complete) = add_local_resource(local[e.index], current_address, slice);
|
||||||
{
|
}
|
||||||
add_rtt_resource(fbos[e.index], slice);
|
|
||||||
}
|
if (remove)
|
||||||
else
|
{
|
||||||
{
|
// If we got here, the section has been fully ingested by the current slice and will never match again.
|
||||||
add_local_resource(local[e.index], current_address, slice);
|
e.index = umax;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (slice_complete && !unordered_list)
|
||||||
|
{
|
||||||
|
// Reached the end of the current slice and we are guaranteed to not match any other section since they lie after this one in memory.
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue