mirror of
https://github.com/RPCSX/rpcsx.git
synced 2026-01-20 07:30:24 +01:00
668 lines
22 KiB
C++
668 lines
22 KiB
C++
#include "stdafx.h"
|
|
#include "nv3089.h"
|
|
|
|
#include "Emu/RSX/RSXThread.h"
|
|
#include "Emu/RSX/Core/RSXReservationLock.hpp"
|
|
#include "Emu/RSX/Common/tiled_dma_copy.hpp"
|
|
|
|
#include "context_accessors.define.h"
|
|
|
|
namespace rsx
|
|
{
|
|
namespace nv3089
|
|
{
|
|
static std::tuple<bool, blit_src_info, blit_dst_info> decode_transfer_registers(context* ctx)
|
|
{
|
|
blit_src_info src_info = {};
|
|
blit_dst_info dst_info = {};
|
|
|
|
const rsx::blit_engine::transfer_operation operation = REGS(ctx)->blit_engine_operation();
|
|
|
|
const u16 out_x = REGS(ctx)->blit_engine_output_x();
|
|
const u16 out_y = REGS(ctx)->blit_engine_output_y();
|
|
const u16 out_w = REGS(ctx)->blit_engine_output_width();
|
|
const u16 out_h = REGS(ctx)->blit_engine_output_height();
|
|
|
|
const u16 in_w = REGS(ctx)->blit_engine_input_width();
|
|
const u16 in_h = REGS(ctx)->blit_engine_input_height();
|
|
|
|
const blit_engine::transfer_origin in_origin = REGS(ctx)->blit_engine_input_origin();
|
|
auto src_color_format = REGS(ctx)->blit_engine_src_color_format();
|
|
|
|
const f32 scale_x = REGS(ctx)->blit_engine_ds_dx();
|
|
const f32 scale_y = REGS(ctx)->blit_engine_dt_dy();
|
|
|
|
// Clipping
|
|
// Validate that clipping rect will fit onto both src and dst regions
|
|
const u16 clip_w = std::min(REGS(ctx)->blit_engine_clip_width(), out_w);
|
|
const u16 clip_h = std::min(REGS(ctx)->blit_engine_clip_height(), out_h);
|
|
|
|
// Check both clip dimensions and dst dimensions
|
|
if (clip_w == 0 || clip_h == 0)
|
|
{
|
|
rsx_log.warning("NV3089_IMAGE_IN: Operation NOPed out due to empty regions");
|
|
return { false, src_info, dst_info };
|
|
}
|
|
|
|
if (in_w == 0 || in_h == 0)
|
|
{
|
|
// Input cant be an empty region
|
|
fmt::throw_exception("NV3089_IMAGE_IN_SIZE: Invalid blit dimensions passed (in_w=%d, in_h=%d)", in_w, in_h);
|
|
}
|
|
|
|
u16 clip_x = REGS(ctx)->blit_engine_clip_x();
|
|
u16 clip_y = REGS(ctx)->blit_engine_clip_y();
|
|
|
|
//Fit onto dst
|
|
if (clip_x && (out_x + clip_x + clip_w) > out_w) clip_x = 0;
|
|
if (clip_y && (out_y + clip_y + clip_h) > out_h) clip_y = 0;
|
|
|
|
u16 in_pitch = REGS(ctx)->blit_engine_input_pitch();
|
|
|
|
switch (in_origin)
|
|
{
|
|
case blit_engine::transfer_origin::corner:
|
|
case blit_engine::transfer_origin::center:
|
|
break;
|
|
default:
|
|
rsx_log.warning("NV3089_IMAGE_IN_SIZE: unknown origin (%d)", static_cast<u8>(in_origin));
|
|
}
|
|
|
|
if (operation != rsx::blit_engine::transfer_operation::srccopy)
|
|
{
|
|
rsx_log.error("NV3089_IMAGE_IN_SIZE: unknown operation (0x%x)", REGS(ctx)->registers[NV3089_SET_OPERATION]);
|
|
RSX(ctx)->recover_fifo();
|
|
return { false, src_info, dst_info };
|
|
}
|
|
|
|
if (!src_color_format)
|
|
{
|
|
rsx_log.error("NV3089_IMAGE_IN_SIZE: unknown src color format (0x%x)", REGS(ctx)->registers[NV3089_SET_COLOR_FORMAT]);
|
|
RSX(ctx)->recover_fifo();
|
|
return { false, src_info, dst_info };
|
|
}
|
|
|
|
const u32 src_offset = REGS(ctx)->blit_engine_input_offset();
|
|
const u32 src_dma = REGS(ctx)->blit_engine_input_location();
|
|
|
|
u32 dst_offset;
|
|
u32 dst_dma = 0;
|
|
rsx::blit_engine::transfer_destination_format dst_color_format;
|
|
u32 out_pitch = 0;
|
|
[[maybe_unused]] u32 out_alignment = 64;
|
|
bool is_block_transfer = false;
|
|
|
|
switch (REGS(ctx)->blit_engine_context_surface())
|
|
{
|
|
case blit_engine::context_surface::surface2d:
|
|
{
|
|
dst_dma = REGS(ctx)->blit_engine_output_location_nv3062();
|
|
dst_offset = REGS(ctx)->blit_engine_output_offset_nv3062();
|
|
out_pitch = REGS(ctx)->blit_engine_output_pitch_nv3062();
|
|
out_alignment = REGS(ctx)->blit_engine_output_alignment_nv3062();
|
|
is_block_transfer = fcmp(scale_x, 1.f) && fcmp(scale_y, 1.f);
|
|
|
|
if (auto dst_fmt = REGS(ctx)->blit_engine_nv3062_color_format(); !dst_fmt)
|
|
{
|
|
rsx_log.error("NV3089_IMAGE_IN_SIZE: unknown NV3062 dst color format (0x%x)", REGS(ctx)->registers[NV3062_SET_COLOR_FORMAT]);
|
|
RSX(ctx)->recover_fifo();
|
|
return { false, src_info, dst_info };
|
|
}
|
|
else
|
|
{
|
|
dst_color_format = dst_fmt;
|
|
}
|
|
|
|
break;
|
|
}
|
|
case blit_engine::context_surface::swizzle2d:
|
|
{
|
|
dst_dma = REGS(ctx)->blit_engine_nv309E_location();
|
|
dst_offset = REGS(ctx)->blit_engine_nv309E_offset();
|
|
|
|
if (auto dst_fmt = REGS(ctx)->blit_engine_output_format_nv309E(); !dst_fmt)
|
|
{
|
|
rsx_log.error("NV3089_IMAGE_IN_SIZE: unknown NV309E dst color format (0x%x)", REGS(ctx)->registers[NV309E_SET_FORMAT]);
|
|
RSX(ctx)->recover_fifo();
|
|
return { false, src_info, dst_info };
|
|
}
|
|
else
|
|
{
|
|
dst_color_format = dst_fmt;
|
|
}
|
|
|
|
break;
|
|
}
|
|
default:
|
|
rsx_log.error("NV3089_IMAGE_IN_SIZE: unknown m_context_surface (0x%x)", static_cast<u8>(REGS(ctx)->blit_engine_context_surface()));
|
|
return { false, src_info, dst_info };
|
|
}
|
|
|
|
const u32 in_bpp = (src_color_format == rsx::blit_engine::transfer_source_format::r5g6b5) ? 2 : 4; // bytes per pixel
|
|
const u32 out_bpp = (dst_color_format == rsx::blit_engine::transfer_destination_format::r5g6b5) ? 2 : 4;
|
|
|
|
if (out_pitch == 0)
|
|
{
|
|
out_pitch = out_bpp * out_w;
|
|
}
|
|
|
|
if (in_pitch == 0)
|
|
{
|
|
in_pitch = in_bpp * in_w;
|
|
}
|
|
|
|
if (in_bpp != out_bpp)
|
|
{
|
|
is_block_transfer = false;
|
|
}
|
|
|
|
u16 in_x, in_y;
|
|
if (in_origin == blit_engine::transfer_origin::center)
|
|
{
|
|
// Convert to normal u,v addressing. Under this scheme offset of 1 is actually half-way inside pixel 0
|
|
const float x = std::max(REGS(ctx)->blit_engine_in_x(), 0.5f);
|
|
const float y = std::max(REGS(ctx)->blit_engine_in_y(), 0.5f);
|
|
in_x = static_cast<u16>(std::floor(x - 0.5f));
|
|
in_y = static_cast<u16>(std::floor(y - 0.5f));
|
|
}
|
|
else
|
|
{
|
|
in_x = static_cast<u16>(std::floor(REGS(ctx)->blit_engine_in_x()));
|
|
in_y = static_cast<u16>(std::floor(REGS(ctx)->blit_engine_in_y()));
|
|
}
|
|
|
|
// Check for subpixel addressing
|
|
if (scale_x < 1.f)
|
|
{
|
|
float dst_x = in_x * scale_x;
|
|
in_x = static_cast<u16>(std::floor(dst_x) / scale_x);
|
|
}
|
|
|
|
if (scale_y < 1.f)
|
|
{
|
|
float dst_y = in_y * scale_y;
|
|
in_y = static_cast<u16>(std::floor(dst_y) / scale_y);
|
|
}
|
|
|
|
const u32 in_offset = in_x * in_bpp + in_pitch * in_y;
|
|
const u32 out_offset = out_x * out_bpp + out_pitch * out_y;
|
|
|
|
const u32 src_line_length = (in_w * in_bpp);
|
|
|
|
u32 src_address = 0;
|
|
const u32 dst_address = get_address(dst_offset, dst_dma, 1); // TODO: Add size
|
|
|
|
if (is_block_transfer && (clip_h == 1 || (in_pitch == out_pitch && src_line_length == in_pitch)))
|
|
{
|
|
const u32 nb_lines = std::min(clip_h, in_h);
|
|
const u32 data_length = nb_lines * src_line_length;
|
|
|
|
if (src_address = get_address(src_offset, src_dma, data_length);
|
|
!src_address || !dst_address)
|
|
{
|
|
RSX(ctx)->recover_fifo();
|
|
return { false, src_info, dst_info };
|
|
}
|
|
|
|
RSX(ctx)->invalidate_fragment_program(dst_dma, dst_offset, data_length);
|
|
|
|
if (const auto result = RSX(ctx)->read_barrier(src_address, data_length, false);
|
|
result == rsx::result_zcull_intr)
|
|
{
|
|
if (RSX(ctx)->copy_zcull_stats(src_address, data_length, dst_address) == data_length)
|
|
{
|
|
// All writes deferred
|
|
return { false, src_info, dst_info };
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
const u16 read_h = std::min(static_cast<u16>(clip_h / scale_y), in_h);
|
|
const u32 data_length = in_pitch * (read_h - 1) + src_line_length;
|
|
|
|
if (src_address = get_address(src_offset, src_dma, data_length);
|
|
!src_address || !dst_address)
|
|
{
|
|
RSX(ctx)->recover_fifo();
|
|
return { false, src_info, dst_info };
|
|
}
|
|
|
|
RSX(ctx)->invalidate_fragment_program(dst_dma, dst_offset, data_length);
|
|
RSX(ctx)->read_barrier(src_address, data_length, true);
|
|
}
|
|
|
|
if (src_address == dst_address &&
|
|
in_w == clip_w && in_h == clip_h &&
|
|
in_pitch == out_pitch &&
|
|
rsx::fcmp(scale_x, 1.f) && rsx::fcmp(scale_y, 1.f))
|
|
{
|
|
// NULL operation
|
|
rsx_log.warning("NV3089_IMAGE_IN: Operation writes memory onto itself with no modification (move-to-self). Will ignore.");
|
|
return { false, src_info, dst_info };
|
|
}
|
|
|
|
u8* pixels_src = vm::_ptr<u8>(src_address + in_offset);
|
|
u8* pixels_dst = vm::_ptr<u8>(dst_address + out_offset);
|
|
|
|
if (dst_color_format != rsx::blit_engine::transfer_destination_format::r5g6b5 &&
|
|
dst_color_format != rsx::blit_engine::transfer_destination_format::a8r8g8b8)
|
|
{
|
|
fmt::throw_exception("NV3089_IMAGE_IN_SIZE: unknown dst_color_format (%d)", static_cast<u8>(dst_color_format));
|
|
}
|
|
|
|
if (src_color_format != rsx::blit_engine::transfer_source_format::r5g6b5 &&
|
|
src_color_format != rsx::blit_engine::transfer_source_format::a8r8g8b8)
|
|
{
|
|
// Alpha has no meaning in both formats
|
|
if (src_color_format == rsx::blit_engine::transfer_source_format::x8r8g8b8)
|
|
{
|
|
src_color_format = rsx::blit_engine::transfer_source_format::a8r8g8b8;
|
|
}
|
|
else
|
|
{
|
|
// TODO: Support more formats
|
|
fmt::throw_exception("NV3089_IMAGE_IN_SIZE: unknown src_color_format (%d)", static_cast<u8>(*src_color_format));
|
|
}
|
|
}
|
|
|
|
u32 convert_w = static_cast<u32>(std::abs(scale_x) * in_w);
|
|
u32 convert_h = static_cast<u32>(std::abs(scale_y) * in_h);
|
|
|
|
if (convert_w == 0 || convert_h == 0)
|
|
{
|
|
rsx_log.error("NV3089_IMAGE_IN: Invalid dimensions or scaling factor. Request ignored (ds_dx=%f, dt_dy=%f)",
|
|
REGS(ctx)->blit_engine_ds_dx(), REGS(ctx)->blit_engine_dt_dy());
|
|
return { false, src_info, dst_info };
|
|
}
|
|
|
|
src_info.format = src_color_format;
|
|
src_info.origin = in_origin;
|
|
src_info.width = in_w;
|
|
src_info.height = in_h;
|
|
src_info.pitch = in_pitch;
|
|
src_info.bpp = in_bpp;
|
|
src_info.offset_x = in_x;
|
|
src_info.offset_y = in_y;
|
|
src_info.dma = src_dma;
|
|
src_info.rsx_address = src_address;
|
|
src_info.pixels = pixels_src;
|
|
|
|
dst_info.format = dst_color_format;
|
|
dst_info.width = convert_w;
|
|
dst_info.height = convert_h;
|
|
dst_info.clip_x = clip_x;
|
|
dst_info.clip_y = clip_y;
|
|
dst_info.clip_width = clip_w;
|
|
dst_info.clip_height = clip_h;
|
|
dst_info.offset_x = out_x;
|
|
dst_info.offset_y = out_y;
|
|
dst_info.pitch = out_pitch;
|
|
dst_info.bpp = out_bpp;
|
|
dst_info.scale_x = scale_x;
|
|
dst_info.scale_y = scale_y;
|
|
dst_info.dma = dst_dma;
|
|
dst_info.rsx_address = dst_address;
|
|
dst_info.pixels = pixels_dst;
|
|
dst_info.swizzled = (REGS(ctx)->blit_engine_context_surface() == blit_engine::context_surface::swizzle2d);
|
|
|
|
return { true, src_info, dst_info };
|
|
}
|
|
|
|
void linear_copy(
|
|
const blit_dst_info& dst,
|
|
const blit_src_info& src,
|
|
u16 out_w,
|
|
u16 out_h,
|
|
u32 slice_h,
|
|
AVPixelFormat ffmpeg_src_format,
|
|
AVPixelFormat ffmpeg_dst_format,
|
|
bool need_convert,
|
|
bool need_clip,
|
|
bool src_is_modified,
|
|
bool interpolate)
|
|
{
|
|
std::vector<u8> temp2;
|
|
|
|
if (!need_convert) [[ likely ]]
|
|
{
|
|
const bool is_overlapping = !src_is_modified && dst.dma == src.dma && [&]() -> bool
|
|
{
|
|
const auto src_range = utils::address_range::start_length(src.rsx_address, src.pitch * (src.height - 1) + (src.bpp * src.width));
|
|
const auto dst_range = utils::address_range::start_length(dst.rsx_address, dst.pitch * (dst.clip_height - 1) + (dst.bpp * dst.clip_width));
|
|
return src_range.overlaps(dst_range);
|
|
}();
|
|
|
|
if (is_overlapping) [[ unlikely ]]
|
|
{
|
|
if (need_clip)
|
|
{
|
|
temp2.resize(dst.pitch * dst.clip_height);
|
|
clip_image_may_overlap(dst.pixels, src.pixels, dst.clip_x, dst.clip_y, dst.clip_width, dst.clip_height, dst.bpp, src.pitch, dst.pitch, temp2.data());
|
|
return;
|
|
}
|
|
|
|
if (dst.pitch != src.pitch || dst.pitch != dst.bpp * out_w)
|
|
{
|
|
const u32 buffer_pitch = dst.bpp * out_w;
|
|
temp2.resize(buffer_pitch * out_h);
|
|
std::add_pointer_t<u8> buf = temp2.data(), pixels = src.pixels;
|
|
|
|
// Read the whole buffer from source
|
|
for (u32 y = 0; y < out_h; ++y)
|
|
{
|
|
std::memcpy(buf, pixels, buffer_pitch);
|
|
pixels += src.pitch;
|
|
buf += buffer_pitch;
|
|
}
|
|
|
|
buf = temp2.data(), pixels = dst.pixels;
|
|
|
|
// Write to destination
|
|
for (u32 y = 0; y < out_h; ++y)
|
|
{
|
|
std::memcpy(pixels, buf, buffer_pitch);
|
|
pixels += dst.pitch;
|
|
buf += buffer_pitch;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
std::memmove(dst.pixels, src.pixels, dst.pitch * out_h);
|
|
return;
|
|
}
|
|
|
|
if (need_clip) [[ unlikely ]]
|
|
{
|
|
clip_image(dst.pixels, src.pixels, dst.clip_x, dst.clip_y, dst.clip_width, dst.clip_height, dst.bpp, src.pitch, dst.pitch);
|
|
return;
|
|
}
|
|
|
|
if (dst.pitch != src.pitch || dst.pitch != dst.bpp * out_w) [[ unlikely ]]
|
|
{
|
|
u8* dst_pixels = dst.pixels, * src_pixels = src.pixels;
|
|
|
|
for (u32 y = 0; y < out_h; ++y)
|
|
{
|
|
std::memcpy(dst_pixels, src_pixels, out_w * dst.bpp);
|
|
dst_pixels += dst.pitch;
|
|
src_pixels += src.pitch;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
std::memcpy(dst.pixels, src.pixels, dst.pitch * out_h);
|
|
return;
|
|
}
|
|
|
|
if (need_clip) [[ unlikely ]]
|
|
{
|
|
temp2.resize(dst.pitch * std::max<u32>(dst.height, dst.clip_height));
|
|
|
|
convert_scale_image(temp2.data(), ffmpeg_dst_format, dst.width, dst.height, dst.pitch,
|
|
src.pixels, ffmpeg_src_format, src.width, src.height, src.pitch, slice_h, interpolate);
|
|
|
|
clip_image(dst.pixels, temp2.data(), dst.clip_x, dst.clip_y, dst.clip_width, dst.clip_height, dst.bpp, dst.pitch, dst.pitch);
|
|
return;
|
|
}
|
|
|
|
convert_scale_image(dst.pixels, ffmpeg_dst_format, out_w, out_h, dst.pitch,
|
|
src.pixels, ffmpeg_src_format, src.width, src.height, src.pitch, slice_h,
|
|
interpolate);
|
|
}
|
|
|
|
std::vector<u8> swizzled_copy_1(
|
|
const blit_dst_info& dst,
|
|
const blit_src_info& src,
|
|
u16 out_w,
|
|
u16 out_h,
|
|
u32 slice_h,
|
|
AVPixelFormat ffmpeg_src_format,
|
|
AVPixelFormat ffmpeg_dst_format,
|
|
bool need_convert,
|
|
bool need_clip,
|
|
bool interpolate)
|
|
{
|
|
std::vector<u8> temp2, temp3;
|
|
|
|
if (need_clip)
|
|
{
|
|
temp3.resize(dst.pitch * dst.clip_height);
|
|
|
|
if (need_convert)
|
|
{
|
|
temp2.resize(dst.pitch * std::max<u32>(dst.height, dst.clip_height));
|
|
|
|
convert_scale_image(temp2.data(), ffmpeg_dst_format, dst.width, dst.height, dst.pitch,
|
|
src.pixels, ffmpeg_src_format, src.width, src.height, src.pitch, slice_h,
|
|
interpolate);
|
|
|
|
clip_image(temp3.data(), temp2.data(), dst.clip_x, dst.clip_y, dst.clip_width, dst.clip_height, dst.bpp, dst.pitch, dst.pitch);
|
|
return temp3;
|
|
}
|
|
|
|
clip_image(temp3.data(), src.pixels, dst.clip_x, dst.clip_y, dst.clip_width, dst.clip_height, dst.bpp, src.pitch, dst.pitch);
|
|
return temp3;
|
|
}
|
|
|
|
if (need_convert)
|
|
{
|
|
temp3.resize(dst.pitch * out_h);
|
|
|
|
convert_scale_image(temp3.data(), ffmpeg_dst_format, out_w, out_h, dst.pitch,
|
|
src.pixels, ffmpeg_src_format, src.width, src.height, src.pitch, slice_h,
|
|
interpolate);
|
|
|
|
return temp3;
|
|
}
|
|
|
|
return {};
|
|
}
|
|
|
|
void swizzled_copy_2(
|
|
u8* linear_pixels,
|
|
u8* swizzled_pixels,
|
|
u32 linear_pitch,
|
|
u16 out_w,
|
|
u16 out_h,
|
|
u8 out_bpp)
|
|
{
|
|
// TODO: Validate these claims. Are the registers always correctly initialized? Should we trust them at all?
|
|
// It looks like rsx may ignore the requested swizzle size and just always
|
|
// round up to nearest power of 2
|
|
/*
|
|
u8 sw_width_log2 = REGS(ctx)->nv309e_sw_width_log2();
|
|
u8 sw_height_log2 = REGS(ctx)->nv309e_sw_height_log2();
|
|
|
|
// 0 indicates height of 1 pixel
|
|
sw_height_log2 = sw_height_log2 == 0 ? 1 : sw_height_log2;
|
|
|
|
// swizzle based on destination size
|
|
u16 sw_width = 1 << sw_width_log2;
|
|
u16 sw_height = 1 << sw_height_log2;
|
|
*/
|
|
|
|
std::vector<u8> sw_temp;
|
|
|
|
u32 sw_width = next_pow2(out_w);
|
|
u32 sw_height = next_pow2(out_h);
|
|
|
|
// Check and pad texture out if we are given non power of 2 output
|
|
if (sw_width != out_w || sw_height != out_h)
|
|
{
|
|
sw_temp.resize(out_bpp * sw_width * sw_height);
|
|
|
|
switch (out_bpp)
|
|
{
|
|
case 1:
|
|
pad_texture<u8>(linear_pixels, sw_temp.data(), out_w, out_h, sw_width, sw_height);
|
|
break;
|
|
case 2:
|
|
pad_texture<u16>(linear_pixels, sw_temp.data(), out_w, out_h, sw_width, sw_height);
|
|
break;
|
|
case 4:
|
|
pad_texture<u32>(linear_pixels, sw_temp.data(), out_w, out_h, sw_width, sw_height);
|
|
break;
|
|
}
|
|
|
|
linear_pixels = sw_temp.data();
|
|
}
|
|
|
|
switch (out_bpp)
|
|
{
|
|
case 1:
|
|
convert_linear_swizzle<u8, false>(linear_pixels, swizzled_pixels, sw_width, sw_height, linear_pitch);
|
|
break;
|
|
case 2:
|
|
convert_linear_swizzle<u16, false>(linear_pixels, swizzled_pixels, sw_width, sw_height, linear_pitch);
|
|
break;
|
|
case 4:
|
|
convert_linear_swizzle<u32, false>(linear_pixels, swizzled_pixels, sw_width, sw_height, linear_pitch);
|
|
break;
|
|
}
|
|
}
|
|
|
|
std::vector<u8> _mirror_transform(const blit_src_info& src, bool flip_x, bool flip_y)
|
|
{
|
|
std::vector<u8> temp1;
|
|
if (!flip_x && !flip_y)
|
|
{
|
|
return temp1;
|
|
}
|
|
|
|
const u32 packed_pitch = src.width * src.bpp;
|
|
temp1.resize(packed_pitch * src.height);
|
|
|
|
const s32 stride_y = (flip_y ? -1 : 1) * static_cast<s32>(src.pitch);
|
|
|
|
for (u32 y = 0; y < src.height; ++y)
|
|
{
|
|
u8* dst_pixels = temp1.data() + (packed_pitch * y);
|
|
u8* src_pixels = src.pixels + (static_cast<s32>(y) * stride_y);
|
|
|
|
if (flip_x)
|
|
{
|
|
if (src.bpp == 4) [[ likely ]]
|
|
{
|
|
rsx::memcpy_r<u32>(dst_pixels, src_pixels, src.width);
|
|
continue;
|
|
}
|
|
|
|
rsx::memcpy_r<u16>(dst_pixels, src_pixels, src.width);
|
|
continue;
|
|
}
|
|
|
|
std::memcpy(dst_pixels, src_pixels, packed_pitch);
|
|
}
|
|
|
|
return temp1;
|
|
}
|
|
|
|
void image_in(context* ctx, u32 /*reg*/, u32 /*arg*/)
|
|
{
|
|
auto [success, src, dst] = decode_transfer_registers(ctx);
|
|
if (!success)
|
|
{
|
|
return;
|
|
}
|
|
|
|
// Decode extra params before locking
|
|
const blit_engine::transfer_interpolator in_inter = REGS(ctx)->blit_engine_input_inter();
|
|
const u16 out_w = REGS(ctx)->blit_engine_output_width();
|
|
const u16 out_h = REGS(ctx)->blit_engine_output_height();
|
|
|
|
// Lock here. RSX cannot execute any locking operations from this point, including ZCULL read barriers
|
|
auto res = ::rsx::reservation_lock<true>(
|
|
dst.rsx_address, dst.pitch * dst.clip_height,
|
|
src.rsx_address, src.pitch * src.height);
|
|
|
|
if (!g_cfg.video.force_cpu_blit_processing &&
|
|
(dst.dma == CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER || src.dma == CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER) &&
|
|
RSX(ctx)->scaled_image_from_memory(src, dst, in_inter == blit_engine::transfer_interpolator::foh))
|
|
{
|
|
// HW-accelerated blit
|
|
return;
|
|
}
|
|
|
|
std::vector<u8> mirror_tmp;
|
|
bool src_is_temp = false;
|
|
|
|
// Flip source if needed
|
|
if (dst.scale_y < 0 || dst.scale_x < 0)
|
|
{
|
|
mirror_tmp = _mirror_transform(src, dst.scale_x < 0, dst.scale_y < 0);
|
|
src.pixels = mirror_tmp.data();
|
|
src.pitch = src.width * src.bpp;
|
|
src_is_temp = true;
|
|
}
|
|
|
|
const AVPixelFormat in_format = (src.format == rsx::blit_engine::transfer_source_format::r5g6b5) ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB;
|
|
const AVPixelFormat out_format = (dst.format == rsx::blit_engine::transfer_destination_format::r5g6b5) ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB;
|
|
|
|
const bool need_clip =
|
|
dst.clip_width != src.width ||
|
|
dst.clip_height != src.height ||
|
|
dst.clip_x > 0 || dst.clip_y > 0 ||
|
|
dst.width != out_w || dst.height != out_h;
|
|
|
|
const bool need_convert = out_format != in_format || !rsx::fcmp(fabsf(dst.scale_x), 1.f) || !rsx::fcmp(fabsf(dst.scale_y), 1.f);
|
|
const u32 slice_h = static_cast<u32>(std::ceil(static_cast<f32>(dst.clip_height + dst.clip_y) / dst.scale_y));
|
|
const bool interpolate = in_inter == blit_engine::transfer_interpolator::foh;
|
|
|
|
auto real_dst = dst.pixels;
|
|
const auto tiled_region = RSX(ctx)->get_tiled_memory_region(utils::address_range::start_length(dst.rsx_address, dst.pitch * dst.clip_height));
|
|
std::vector<u8> tmp;
|
|
|
|
if (tiled_region)
|
|
{
|
|
tmp.resize(tiled_region.tile->size);
|
|
real_dst = dst.pixels;
|
|
dst.pixels = tmp.data();
|
|
}
|
|
|
|
if (REGS(ctx)->blit_engine_context_surface() != blit_engine::context_surface::swizzle2d)
|
|
{
|
|
linear_copy(dst, src, out_w, out_h, slice_h, in_format, out_format, need_convert, need_clip, src_is_temp, interpolate);
|
|
}
|
|
else
|
|
{
|
|
// Swizzle_copy_1 prepares usable output buffer from our original source. It mostly deals with cropping and scaling the input pixels so that the final swizzle does not need to apply that.
|
|
const auto swz_temp = swizzled_copy_1(dst, src, out_w, out_h, slice_h, in_format, out_format, need_convert, need_clip, interpolate);
|
|
const u8* pixels_src = src.pixels;
|
|
auto src_pitch = src.pitch;
|
|
|
|
// NOTE: Swizzled copy routine creates temp output buffer that uses dst pitch, not source pitch. We need to account for this if using that output as intermediary buffer.
|
|
if (!swz_temp.empty())
|
|
{
|
|
pixels_src = swz_temp.data();
|
|
src_pitch = dst.pitch;
|
|
}
|
|
|
|
// Swizzle_copy_2 only pads the data and encodes it as a swizzled output. Transformation (scaling, rotation, etc) is done in swizzle_copy_1
|
|
swizzled_copy_2(const_cast<u8*>(pixels_src), dst.pixels, src_pitch, out_w, out_h, dst.bpp);
|
|
}
|
|
|
|
if (tiled_region)
|
|
{
|
|
const auto tile_func = dst.bpp == 4
|
|
? rsx::tile_texel_data32
|
|
: rsx::tile_texel_data16;
|
|
|
|
tile_func(
|
|
real_dst,
|
|
dst.pixels,
|
|
tiled_region.base_address,
|
|
dst.rsx_address - tiled_region.base_address,
|
|
tiled_region.tile->size,
|
|
tiled_region.tile->bank,
|
|
tiled_region.tile->pitch,
|
|
dst.clip_width,
|
|
dst.clip_height
|
|
);
|
|
}
|
|
}
|
|
}
|
|
}
|