mirror of
https://github.com/RPCS3/rpcs3.git
synced 2026-03-10 15:38:08 +01:00
rsx/gl: Accelerate blit using texture cache for storage
This commit is contained in:
parent
85f80cfbd1
commit
5928b78c45
|
|
@ -692,15 +692,6 @@ bool GLGSRender::load_program()
|
|||
else
|
||||
surface = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr);
|
||||
|
||||
if (!surface)
|
||||
{
|
||||
auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch());
|
||||
if (!rsc.surface || rsc.is_depth_surface != is_depth)
|
||||
return std::make_tuple(false, 0);
|
||||
|
||||
surface = rsc.surface;
|
||||
}
|
||||
|
||||
return std::make_tuple(true, surface->get_native_pitch());
|
||||
};
|
||||
|
||||
|
|
@ -972,5 +963,5 @@ void GLGSRender::synchronize_buffers()
|
|||
|
||||
bool GLGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate)
|
||||
{
|
||||
return m_gl_texture_cache.upload_scaled_image(src, dst, interpolate);
|
||||
return m_gl_texture_cache.upload_scaled_image(src, dst, interpolate, m_rtts);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -116,7 +116,9 @@ namespace gl
|
|||
}
|
||||
|
||||
// For an address within the texture, extract this sub-section's rect origin
|
||||
std::tuple<bool, u16, u16> get_texture_subresource(u32 offset)
|
||||
// Checks whether we need to scale the subresource if it is not handled in shader
|
||||
// NOTE1: When surface->real_pitch < rsx_pitch, the surface is assumed to have been scaled to fill the rsx_region
|
||||
std::tuple<bool, u16, u16> get_texture_subresource(u32 offset, bool scale_to_fit)
|
||||
{
|
||||
if (!offset)
|
||||
{
|
||||
|
|
@ -132,9 +134,14 @@ namespace gl
|
|||
if (!surface_pixel_size)
|
||||
surface_pixel_size = native_pitch / surface_width;
|
||||
|
||||
u32 pixel_offset = (offset / surface_pixel_size);
|
||||
u32 y = (pixel_offset / surface_width);
|
||||
u32 x = (pixel_offset % surface_width);
|
||||
const u32 y = (offset / rsx_pitch);
|
||||
u32 x = (offset % rsx_pitch) / surface_pixel_size;
|
||||
|
||||
if (scale_to_fit)
|
||||
{
|
||||
const f32 x_scale = (f32)rsx_pitch / native_pitch;
|
||||
x = (u32)((f32)x / x_scale);
|
||||
}
|
||||
|
||||
return std::make_tuple(true, (u16)x, (u16)y);
|
||||
}
|
||||
|
|
@ -302,7 +309,7 @@ struct surface_subresource
|
|||
class gl_render_targets : public rsx::surface_store<gl_render_target_traits>
|
||||
{
|
||||
private:
|
||||
bool surface_overlaps(gl::render_target *surface, u32 surface_address, u32 texaddr, u16 *x, u16 *y)
|
||||
bool surface_overlaps(gl::render_target *surface, u32 surface_address, u32 texaddr, u16 *x, u16 *y, bool scale_to_fit)
|
||||
{
|
||||
bool is_subslice = false;
|
||||
u16 x_offset = 0;
|
||||
|
|
@ -314,7 +321,7 @@ private:
|
|||
u32 offset = texaddr - surface_address;
|
||||
if (offset >= 0)
|
||||
{
|
||||
std::tie(is_subslice, x_offset, y_offset) = surface->get_texture_subresource(offset);
|
||||
std::tie(is_subslice, x_offset, y_offset) = surface->get_texture_subresource(offset, scale_to_fit);
|
||||
if (is_subslice)
|
||||
{
|
||||
*x = x_offset;
|
||||
|
|
@ -354,7 +361,7 @@ private:
|
|||
}
|
||||
|
||||
public:
|
||||
surface_subresource get_surface_subresource_if_applicable(u32 texaddr, u16 requested_width, u16 requested_height, u16 requested_pitch)
|
||||
surface_subresource get_surface_subresource_if_applicable(u32 texaddr, u16 requested_width, u16 requested_height, u16 requested_pitch, bool scale_to_fit =false)
|
||||
{
|
||||
gl::render_target *surface = nullptr;
|
||||
bool is_subslice = false;
|
||||
|
|
@ -366,17 +373,31 @@ public:
|
|||
u32 this_address = std::get<0>(tex_info);
|
||||
surface = std::get<1>(tex_info).get();
|
||||
|
||||
if (surface_overlaps(surface, this_address, texaddr, &x_offset, &y_offset))
|
||||
if (surface_overlaps(surface, this_address, texaddr, &x_offset, &y_offset, scale_to_fit))
|
||||
{
|
||||
if (surface->get_rsx_pitch() != requested_pitch)
|
||||
continue;
|
||||
|
||||
auto dims = surface->get_dimensions();
|
||||
|
||||
|
||||
if (scale_to_fit)
|
||||
{
|
||||
f32 pitch_scaling = (f32)requested_pitch / surface->get_native_pitch();
|
||||
requested_width /= pitch_scaling;
|
||||
}
|
||||
|
||||
if (fits(surface, dims, x_offset, y_offset, requested_width, requested_height))
|
||||
return{ surface, x_offset, y_offset, requested_width, requested_height, is_bound(this_address, false), false };
|
||||
else
|
||||
{
|
||||
if (scale_to_fit) //Forcefully fit the requested region by clipping and scaling
|
||||
{
|
||||
u16 remaining_width = dims.first - x_offset;
|
||||
u16 remaining_height = dims.second - y_offset;
|
||||
|
||||
return{ surface, x_offset, y_offset, remaining_width, remaining_height, is_bound(this_address, false), false };
|
||||
}
|
||||
|
||||
if (dims.first >= requested_width && dims.second >= requested_height)
|
||||
{
|
||||
LOG_WARNING(RSX, "Overlapping surface exceeds bounds; returning full surface region");
|
||||
|
|
@ -392,17 +413,31 @@ public:
|
|||
u32 this_address = std::get<0>(tex_info);
|
||||
surface = std::get<1>(tex_info).get();
|
||||
|
||||
if (surface_overlaps(surface, this_address, texaddr, &x_offset, &y_offset))
|
||||
if (surface_overlaps(surface, this_address, texaddr, &x_offset, &y_offset, scale_to_fit))
|
||||
{
|
||||
if (surface->get_rsx_pitch() != requested_pitch)
|
||||
continue;
|
||||
|
||||
auto dims = surface->get_dimensions();
|
||||
|
||||
if (scale_to_fit)
|
||||
{
|
||||
f32 pitch_scaling = (f32)requested_pitch / surface->get_native_pitch();
|
||||
requested_width /= pitch_scaling;
|
||||
}
|
||||
|
||||
if (fits(surface, dims, x_offset, y_offset, requested_width, requested_height))
|
||||
return{ surface, x_offset, y_offset, requested_width, requested_height, is_bound(this_address, true), true };
|
||||
else
|
||||
{
|
||||
if (scale_to_fit) //Forcefully fit the requested region by clipping and scaling
|
||||
{
|
||||
u16 remaining_width = dims.first - x_offset;
|
||||
u16 remaining_height = dims.second - y_offset;
|
||||
|
||||
return{ surface, x_offset, y_offset, remaining_width, remaining_height, is_bound(this_address, false), false };
|
||||
}
|
||||
|
||||
if (dims.first >= requested_width && dims.second >= requested_height)
|
||||
{
|
||||
LOG_WARNING(RSX, "Overlapping depth surface exceeds bounds; returning full surface region");
|
||||
|
|
|
|||
|
|
@ -496,6 +496,10 @@ namespace gl
|
|||
glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGB565, dst_dims.width, dst_dims.height);
|
||||
}
|
||||
|
||||
/* LOG_ERROR(RSX, "First pass blit, copy %d,%d, %dx%d -> %d,%d, %dx%d",
|
||||
src_rect.x1, src_rect.y1, src_rect.x2-src_rect.x1, src_rect.y2-src_rect.y1,
|
||||
dst_rect.x1, dst_rect.y1, dst_rect.x2 - dst_rect.x1, dst_rect.y2 - dst_rect.y1); */
|
||||
|
||||
if (is_argb8)
|
||||
{
|
||||
blit_src.blit(fbo_argb8, src_rect, dst_rect);
|
||||
|
|
@ -507,6 +511,10 @@ namespace gl
|
|||
src_surface = rgb565_surface;
|
||||
}
|
||||
|
||||
/* LOG_ERROR(RSX, "Copy %d,%d %dx%d from blit surface to dest @%dx%d",
|
||||
clip_offset.x, clip_offset.y, clip_dims.width, clip_dims.height,
|
||||
dst_offset.x, dst_offset.y); */
|
||||
|
||||
glCopyImageSubData(src_surface, GL_TEXTURE_2D, 0, clip_offset.x, clip_offset.y, 0,
|
||||
dst_tex, GL_TEXTURE_2D, 0, dst_offset.x, dst_offset.y, 0, clip_dims.width, clip_dims.height, 1);
|
||||
|
||||
|
|
@ -746,7 +754,7 @@ namespace gl
|
|||
* a bound render target. We can bypass the expensive download in this case
|
||||
*/
|
||||
|
||||
surface_subresource rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, tex.width(), tex.height(), tex.pitch());
|
||||
surface_subresource rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, tex.width(), tex.height(), tex.pitch(), true);
|
||||
if (rsc.surface)
|
||||
{
|
||||
//Check that this region is not cpu-dirty before doing a copy
|
||||
|
|
@ -1046,13 +1054,13 @@ namespace gl
|
|||
m_temporary_surfaces.clear();
|
||||
}
|
||||
|
||||
bool upload_scaled_image(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate)
|
||||
bool upload_scaled_image(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, gl_render_targets &m_rtts)
|
||||
{
|
||||
if (dst.swizzled)
|
||||
return false;
|
||||
|
||||
u32 tmp_tex = 0;
|
||||
|
||||
bool src_is_render_target = false; //TODO
|
||||
bool dst_is_render_target = false; //TODO
|
||||
bool dst_is_argb8 = (dst.format == rsx::blit_engine::transfer_destination_format::a8r8g8b8);
|
||||
bool src_is_argb8 = (src.format == rsx::blit_engine::transfer_source_format::a8r8g8b8);
|
||||
|
||||
|
|
@ -1060,63 +1068,121 @@ namespace gl
|
|||
GLenum src_gl_format = src_is_argb8 ? GL_BGRA : GL_RGB;
|
||||
GLenum src_gl_type = src_is_argb8? GL_UNSIGNED_INT_8_8_8_8: GL_UNSIGNED_SHORT_5_6_5;
|
||||
|
||||
if (g_cfg_rsx_write_color_buffers || g_cfg_rsx_write_depth_buffer)
|
||||
{
|
||||
//Invalidate source if we are blitting from an RTT
|
||||
flush_section((u32)((u64)src.pixels - (u64)vm::base(0)));
|
||||
}
|
||||
u32 source_texture = 0;
|
||||
u32 dest_texture = 0;
|
||||
|
||||
glGenTextures(1, &tmp_tex);
|
||||
glBindTexture(GL_TEXTURE_2D, tmp_tex);
|
||||
glTexStorage2D(GL_TEXTURE_2D, 1, src_gl_sized_format, src.width, src.slice_h);
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, src.pitch / (src_is_argb8? 4: 2));
|
||||
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
|
||||
glPixelStorei(GL_UNPACK_SWAP_BYTES, !src_is_argb8);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, src.width, src.slice_h, src_gl_format, src_gl_type, src.pixels);
|
||||
const u32 src_address = (u32)((u64)src.pixels - (u64)vm::base(0));
|
||||
const u32 dst_address = (u32)((u64)dst.pixels - (u64)vm::base(0));
|
||||
|
||||
//Check if src/dst are parts of render targets
|
||||
surface_subresource src_subres = m_rtts.get_surface_subresource_if_applicable(src_address, src.width, src.slice_h, src.pitch, true);
|
||||
src_is_render_target = src_subres.surface != nullptr;
|
||||
|
||||
float scale_x = (f32)dst.width / src.width;
|
||||
float scale_y = (f32)dst.height / src.height;
|
||||
|
||||
const position2i clip_offset = {dst.clip_x, dst.clip_y};
|
||||
position2i dst_offset = {dst.offset_x, dst.offset_y};
|
||||
position2i clip_offset = { dst.clip_x, dst.clip_y };
|
||||
position2i dst_offset = { dst.offset_x, dst.offset_y };
|
||||
|
||||
const size2i clip_dimensions = { dst.clip_width, dst.clip_height};
|
||||
const size2i dst_dimensions = { dst.pitch/(dst_is_argb8? 4: 2), dst.height };
|
||||
size2i clip_dimensions = { dst.clip_width, dst.clip_height };
|
||||
const size2i dst_dimensions = { dst.pitch / (dst_is_argb8 ? 4 : 2), dst.height };
|
||||
|
||||
//Offset in x and y for src is 0 (it is already accounted for when getting pixels_src)
|
||||
//Reproject final clip onto source...
|
||||
const u16 src_w = clip_dimensions.width / scale_x;
|
||||
const u16 src_h = clip_dimensions.height / scale_y;
|
||||
|
||||
const areai src_area = { 0, 0, src_w, src_h };
|
||||
const areai dst_area = { 0, 0, dst.clip_width, dst.clip_height };
|
||||
areai src_area = { 0, 0, src_w, src_h };
|
||||
areai dst_area = { 0, 0, dst.clip_width, dst.clip_height };
|
||||
|
||||
auto old_cached_texture = find_texture(dst.rsx_address, dst.pitch * dst.clip_height);
|
||||
u32 dst_surface = 0;
|
||||
|
||||
if (old_cached_texture)
|
||||
//Create source texture if does not exist
|
||||
if (!src_is_render_target)
|
||||
{
|
||||
dst_surface = old_cached_texture->id();
|
||||
|
||||
const u32 address_offset = dst.rsx_address - old_cached_texture->get_section_base();
|
||||
auto preloaded_texture = find_texture(src_address, src.width, src.slice_h, 1);
|
||||
|
||||
const u16 bpp = dst_is_argb8 ? 4 : 2;
|
||||
const u16 offset_y = address_offset / dst.pitch;
|
||||
const u16 offset_x = address_offset % dst.pitch;
|
||||
if (preloaded_texture != nullptr)
|
||||
{
|
||||
source_texture = preloaded_texture->id();
|
||||
}
|
||||
else
|
||||
{
|
||||
flush_section(src_address);
|
||||
|
||||
dst_offset.x += offset_x / bpp;
|
||||
dst_offset.y += offset_y / bpp;
|
||||
glGenTextures(1, &source_texture);
|
||||
glBindTexture(GL_TEXTURE_2D, source_texture);
|
||||
glTexStorage2D(GL_TEXTURE_2D, 1, src_gl_sized_format, src.width, src.slice_h);
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, src.pitch / (src_is_argb8 ? 4 : 2));
|
||||
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
|
||||
glPixelStorei(GL_UNPACK_SWAP_BYTES, !src_is_argb8);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, src.width, src.slice_h, src_gl_format, src_gl_type, src.pixels);
|
||||
|
||||
std::lock_guard<std::mutex> lock(m_section_mutex);
|
||||
|
||||
auto section = create_texture(source_texture, src_address, src.pitch * src.slice_h, src.width, src.slice_h, 1);
|
||||
section.protect(utils::protection::ro);
|
||||
section.set_dirty(false);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (src_subres.w != clip_dimensions.width ||
|
||||
src_subres.h != clip_dimensions.height)
|
||||
{
|
||||
f32 subres_scaling_x = (f32)src.pitch / src_subres.surface->get_native_pitch();
|
||||
|
||||
dst_area.x2 = (src_subres.w * scale_x * subres_scaling_x);
|
||||
dst_area.y2 = (src_subres.h * scale_y);
|
||||
}
|
||||
|
||||
src_area.x2 = src_subres.w;
|
||||
src_area.y2 = src_subres.h;
|
||||
|
||||
src_area.x1 += src_subres.x;
|
||||
src_area.x2 += src_subres.x;
|
||||
src_area.y1 += src_subres.y;
|
||||
src_area.y2 += src_subres.y;
|
||||
|
||||
source_texture = src_subres.surface->id();
|
||||
}
|
||||
|
||||
u32 texture_id = m_hw_blitter.scale_image(tmp_tex, dst_surface, src_area, dst_area, dst_offset, clip_offset, dst_dimensions, clip_dimensions, dst_is_argb8);
|
||||
glDeleteTextures(1, &tmp_tex);
|
||||
surface_subresource dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst.width, dst.clip_height, dst.pitch, true);
|
||||
dst_is_render_target = dst_subres.surface != nullptr;
|
||||
|
||||
if (!dst_is_render_target)
|
||||
{
|
||||
auto cached_dest = find_texture(dst.rsx_address, dst.pitch * dst.clip_height);
|
||||
|
||||
if (cached_dest)
|
||||
{
|
||||
dest_texture = cached_dest->id();
|
||||
|
||||
//TODO: Move this algo into utils since it is used alot
|
||||
const u32 address_offset = dst.rsx_address - cached_dest->get_section_base();
|
||||
|
||||
const u16 bpp = dst_is_argb8 ? 4 : 2;
|
||||
const u16 offset_y = address_offset / dst.pitch;
|
||||
const u16 offset_x = address_offset % dst.pitch;
|
||||
|
||||
dst_offset.x += offset_x / bpp;
|
||||
dst_offset.y += offset_y;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
dst_offset.x = dst_subres.x;
|
||||
dst_offset.y = dst_subres.y;
|
||||
|
||||
dest_texture = dst_subres.surface->id();
|
||||
}
|
||||
|
||||
u32 texture_id = m_hw_blitter.scale_image(source_texture, dest_texture, src_area, dst_area, dst_offset, clip_offset, dst_dimensions, clip_dimensions, dst_is_argb8);
|
||||
|
||||
/* LOG_ERROR(RSX, "SIFM: address=0x%X + 0x%X, x=%d(%d), y=%d(%d), w=%d(%d), h=%d(%d)", dst.rsx_address, dst.pitch * dst.height,
|
||||
dst.offset_x, dst.clip_x, dst.offset_y, dst.clip_y, dst.width, dst.clip_width, dst.height, dst.clip_height); */
|
||||
|
||||
if (dst_surface)
|
||||
if (dest_texture)
|
||||
return true;
|
||||
|
||||
std::lock_guard<std::mutex> lock(m_section_mutex);
|
||||
|
|
|
|||
|
|
@ -41,6 +41,8 @@ namespace rsx
|
|||
u16 slice_h;
|
||||
u16 pitch;
|
||||
void *pixels;
|
||||
|
||||
u32 rsx_address;
|
||||
};
|
||||
|
||||
struct blit_dst_info
|
||||
|
|
|
|||
|
|
@ -559,6 +559,7 @@ namespace rsx
|
|||
src_info.offset_x = in_x;
|
||||
src_info.offset_y = in_y;
|
||||
src_info.pixels = pixels_src;
|
||||
src_info.rsx_address = get_address(src_offset, src_dma);
|
||||
|
||||
dst_info.format = dst_color_format;
|
||||
dst_info.width = convert_w;
|
||||
|
|
|
|||
|
|
@ -69,7 +69,7 @@
|
|||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release - LLVM|x64'">
|
||||
<ClCompile>
|
||||
<AdditionalIncludeDirectories>..\rsx_program_decompiler\rsx_decompiler;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<Optimization>Full</Optimization>
|
||||
<Optimization>Disabled</Optimization>
|
||||
</ClCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
|
|
|
|||
Loading…
Reference in a new issue