#include "stdafx.h" #include "Emu/RSX/RSXThread.h" #include "GLTexture.h" #include "GLTextureCache.h" #include "../Common/BufferUtils.h" #include "util/asm.hpp" namespace gl { static u64 encode_properties(GLenum sized_internal_fmt, GLenum target, u16 width, u16 height, u16 depth, u8 mipmaps) { // Generate cache key // 00..13 = width // 14..27 = height // 28..35 = depth // 36..39 = mipmaps // 40..41 = type // 42..57 = format ensure(((width | height) & ~0x3fff) == 0, "Image dimensions are too large - lower your resolution scale."); ensure(mipmaps <= 13); GLuint target_encoding = 0; switch (target) { case GL_TEXTURE_1D: target_encoding = 0; break; case GL_TEXTURE_2D: target_encoding = 1; break; case GL_TEXTURE_3D: target_encoding = 2; break; case GL_TEXTURE_CUBE_MAP: target_encoding = 3; break; default: fmt::throw_exception("Unsupported destination target 0x%x", target); } const u64 key = (static_cast(width) << 0) | (static_cast(height) << 14) | (static_cast(depth) << 28) | (static_cast(mipmaps) << 36) | (static_cast(target_encoding) << 40) | (static_cast(sized_internal_fmt) << 42); return key; } void cached_texture_section::finish_flush() { // Free resources pbo.unmap(); const auto valid_range = get_confirmed_range_delta(); const u32 valid_offset = valid_range.first; const u32 valid_length = valid_range.second; void *dst = get_ptr(get_section_base() + valid_offset); if (!gl::get_driver_caps().ARB_compute_shader_supported) { switch (type) { case gl::texture::type::sbyte: case gl::texture::type::ubyte: { // byte swapping does not work on byte types, use uint_8_8_8_8 for rgba8 instead to avoid penalty ensure(!pack_unpack_swap_bytes); break; } case gl::texture::type::uint_24_8: { // Swap bytes on D24S8 does not swap the whole dword, just shuffles the 3 bytes for D24 // In this regard, D24S8 is the same structure on both PC and PS3, but the endianness of the whole block is reversed on PS3 ensure(pack_unpack_swap_bytes == false); ensure(real_pitch == (width * 4)); if (rsx_pitch == real_pitch) [[likely]] { copy_data_swap_u32(static_cast(dst), static_cast(dst), valid_length / 4); } else { const u32 num_rows = utils::align(valid_length, rsx_pitch) / rsx_pitch; u32* data = static_cast(dst); for (u32 row = 0; row < num_rows; ++row) { copy_data_swap_u32(data, data, width); data += rsx_pitch / 4; } } break; } default: break; } } if (is_swizzled()) { // This format is completely worthless to CPU processing algorithms where cache lines on die are linear. // If this is happening, usually it means it was not a planned readback (e.g shared pages situation) rsx_log.warning("[Performance warning] CPU readback of swizzled data"); // Read-modify-write to avoid corrupting already resident memory outside texture region std::vector tmp_data(rsx_pitch * height); std::memcpy(tmp_data.data(), dst, tmp_data.size()); switch (type) { case gl::texture::type::uint_8_8_8_8_rev: case gl::texture::type::uint_8_8_8_8: case gl::texture::type::uint_24_8: rsx::convert_linear_swizzle(tmp_data.data(), dst, width, height, rsx_pitch); break; case gl::texture::type::ushort_5_6_5: case gl::texture::type::ushort: rsx::convert_linear_swizzle(tmp_data.data(), dst, width, height, rsx_pitch); break; default: rsx_log.error("Unexpected swizzled texture format 0x%x", static_cast(format)); } } } gl::texture_view* texture_cache::create_temporary_subresource_impl(gl::command_context& cmd, gl::texture* src, GLenum sized_internal_fmt, GLenum dst_target, u32 gcm_format, u16 x, u16 y, u16 width, u16 height, u16 depth, u8 mipmaps, const rsx::texture_channel_remap_t& remap, bool copy) { if (sized_internal_fmt == GL_NONE) { sized_internal_fmt = gl::get_sized_internal_format(gcm_format); } temporary_image_t* dst = nullptr; const auto match_key = encode_properties(sized_internal_fmt, dst_target, width, height, depth, mipmaps); // Search image cache for (auto& e : m_temporary_surfaces) { if (e->has_refs()) { continue; } if (e->properties_encoding == match_key) { dst = e.get(); break; } } if (!dst) { std::unique_ptr data = std::make_unique(dst_target, width, height, depth, mipmaps, sized_internal_fmt, rsx::classify_format(gcm_format)); dst = data.get(); dst->properties_encoding = match_key; m_temporary_surfaces.emplace_back(std::move(data)); } dst->add_ref(); if (copy) { std::vector region = {{ src, rsx::surface_transform::coordinate_transform, 0, x, y, 0, 0, 0, width, height, width, height }}; copy_transfer_regions_impl(cmd, dst, region); } if (!src || static_cast(src->get_internal_format()) != sized_internal_fmt) { // Apply base component map onto the new texture if a data cast has been done auto components = get_component_mapping(gcm_format, rsx::component_order::default_); dst->set_native_component_layout(components); } const auto encoding = rsx::get_remap_encoding(remap); return dst->get_view(encoding, remap); } void texture_cache::copy_transfer_regions_impl(gl::command_context& cmd, gl::texture* dst_image, const std::vector& sources) const { const auto dst_bpp = dst_image->pitch() / dst_image->width(); const auto dst_aspect = dst_image->aspect(); for (const auto &slice : sources) { if (!slice.src) continue; const bool typeless = !formats_are_bitcast_compatible(slice.src, dst_image); ensure(typeless || dst_aspect == slice.src->aspect()); std::unique_ptr tmp; auto src_image = slice.src; auto src_x = slice.src_x; auto src_y = slice.src_y; auto src_w = slice.src_w; auto src_h = slice.src_h; if (slice.xform == rsx::surface_transform::coordinate_transform) { // Dimensions were given in 'dst' space. Work out the real source coordinates const auto src_bpp = slice.src->pitch() / slice.src->width(); src_x = (src_x * dst_bpp) / src_bpp; src_w = utils::aligned_div(src_w * dst_bpp, src_bpp); } if (auto surface = dynamic_cast(slice.src)) { surface->transform_samples_to_pixels(src_x, src_w, src_y, src_h); } if (typeless) [[unlikely]] { const auto src_bpp = slice.src->pitch() / slice.src->width(); const u16 convert_w = u16(slice.src->width() * src_bpp) / dst_bpp; tmp = std::make_unique(GL_TEXTURE_2D, convert_w, slice.src->height(), 1, 1, static_cast(dst_image->get_internal_format()), dst_image->format_class()); src_image = tmp.get(); // Compute src region in dst format layout const u16 src_w2 = u16(src_w * src_bpp) / dst_bpp; const u16 src_x2 = u16(src_x * src_bpp) / dst_bpp; if (src_w2 == slice.dst_w && src_h == slice.dst_h && slice.level == 0) { // Optimization, avoid typeless copy to tmp followed by data copy to dst // Combine the two transfers into one const coord3u src_region = { { src_x, src_y, 0 }, { src_w, src_h, 1 } }; const coord3u dst_region = { { slice.dst_x, slice.dst_y, slice.dst_z }, { slice.dst_w, slice.dst_h, 1 } }; gl::copy_typeless(cmd, dst_image, slice.src, dst_region, src_region); continue; } const coord3u src_region = { { src_x, src_y, 0 }, { src_w, src_h, 1 } }; const coord3u dst_region = { { src_x2, src_y, 0 }, { src_w2, src_h, 1 } }; gl::copy_typeless(cmd, src_image, slice.src, dst_region, src_region); src_x = src_x2; src_w = src_w2; } if (src_w == slice.dst_w && src_h == slice.dst_h) { gl::g_hw_blitter->copy_image(cmd, src_image, dst_image, 0, slice.level, position3i{ src_x, src_y, 0 }, position3i{ slice.dst_x, slice.dst_y, slice.dst_z }, size3i{ src_w, src_h, 1 }); } else { ensure(dst_image->get_target() == gl::texture::target::texture2D); auto _blitter = gl::g_hw_blitter; const areai src_rect = { src_x, src_y, src_x + src_w, src_y + src_h }; const areai dst_rect = { slice.dst_x, slice.dst_y, slice.dst_x + slice.dst_w, slice.dst_y + slice.dst_h }; gl::texture* _dst; if (src_image->get_internal_format() == dst_image->get_internal_format() && slice.level == 0) { _dst = dst_image; } else { tmp = std::make_unique(GL_TEXTURE_2D, dst_rect.x2, dst_rect.y2, 1, 1, static_cast(slice.src->get_internal_format())); _dst = tmp.get(); } _blitter->scale_image(cmd, src_image, _dst, src_rect, dst_rect, false, {}); if (_dst != dst_image) { // Data cast comes after scaling gl::g_hw_blitter->copy_image(cmd, tmp.get(), dst_image, 0, slice.level, position3i{slice.dst_x, slice.dst_y, 0}, position3i{slice.dst_x, slice.dst_y, slice.dst_z}, size3i{slice.dst_w, slice.dst_h, 1}); } } } } }