From 768b4f8c65553c041ed163e018a6db26f0694976 Mon Sep 17 00:00:00 2001 From: Eladash Date: Tue, 24 Mar 2020 11:47:13 +0200 Subject: [PATCH] rsx: Improve NV308A_COLOR * Fix NV308A_COLOR methods range. * Batch NV308A_COLOR methods execution together. * Fix termination of bind_range<> in rsx methods binding. --- rpcs3/Emu/RSX/RSXFIFO.cpp | 37 ++++++++ rpcs3/Emu/RSX/RSXFIFO.h | 3 + rpcs3/Emu/RSX/RSXThread.h | 2 + rpcs3/Emu/RSX/rsx_methods.cpp | 157 ++++++++++++++++++++++------------ 4 files changed, 146 insertions(+), 53 deletions(-) diff --git a/rpcs3/Emu/RSX/RSXFIFO.cpp b/rpcs3/Emu/RSX/RSXFIFO.cpp index 46cf34669b..1190b44aff 100644 --- a/rpcs3/Emu/RSX/RSXFIFO.cpp +++ b/rpcs3/Emu/RSX/RSXFIFO.cpp @@ -88,6 +88,25 @@ namespace rsx return false; } + // Optimization for methods which can be batched together + // Beware, can be easily misused + bool FIFO_control::skip_methods(u32 count) + { + if (m_remaining_commands > count) + { + m_command_reg += m_command_inc * count; + m_args_ptr += 4 * count; + m_remaining_commands -= count; + m_internal_get += 4 * count; + + return true; + } + + m_internal_get += 4 * m_remaining_commands; + m_remaining_commands = 0; + return false; + } + void FIFO_control::abort() { m_remaining_commands = 0; @@ -517,8 +536,26 @@ namespace rsx capture::capture_buffer_notify(this, it); break; default: + { + // Use legacy logic for NV308A_COLOR - enqueue leading command with count + // Then enqueue each command arg alone with a no-op command + if (reg >= NV308A_COLOR && reg < NV308A_COLOR + 0x700) + { + const u32 remaining = std::min(fifo_ctrl->get_remaining_args_count(), (NV308A_COLOR + 0x700) - reg); + + it.rsx_command.first = (fifo_ctrl->last_cmd() & RSX_METHOD_NON_INCREMENT_CMD_MASK) | (reg << 2) | (remaining << 18); + + for (u32 i = 0; i < remaining && fifo_ctrl->get_pos() + (i + 1) * 4 != (ctrl->put & ~3); i++) + { + replay_cmd.rsx_command = std::make_pair(0, vm::read32(fifo_ctrl->get_current_arg_ptr() + (i + 1) * 4)); + + frame_capture.replay_commands.push_back(replay_cmd); + } + } + break; } + } } } diff --git a/rpcs3/Emu/RSX/RSXFIFO.h b/rpcs3/Emu/RSX/RSXFIFO.h index 3842e7e28d..80571b9d38 100644 --- a/rpcs3/Emu/RSX/RSXFIFO.h +++ b/rpcs3/Emu/RSX/RSXFIFO.h @@ -132,6 +132,8 @@ namespace rsx u32 get_pos() const { return m_internal_get; } u32 last_cmd() const { return m_cmd; } void sync_get() { m_ctrl->get.release(m_internal_get); } + u32 get_current_arg_ptr() const { return m_args_ptr; }; + u32 get_remaining_args_count() const { return m_remaining_commands; } void inc_get(bool wait); void set_get(u32 get); void abort(); @@ -139,6 +141,7 @@ namespace rsx void read(register_pair& data); inline bool read_unsafe(register_pair& data); + bool skip_methods(u32 count); }; } } \ No newline at end of file diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index f0871d63f6..bef680aa83 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -588,7 +588,9 @@ namespace rsx backend_configuration backend_config{}; // FIFO + public: std::unique_ptr fifo_ctrl; + protected: FIFO::flattening_helper m_flattener; u32 fifo_ret_addr = RSX_CALL_STACK_EMPTY; u32 saved_fifo_ret = RSX_CALL_STACK_EMPTY; diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index e217a2472c..58c4c8fafd 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -819,36 +819,93 @@ namespace rsx template struct color { - static void impl(thread* rsx, u32 _reg, u32 arg) + static void impl(thread* rsx, u32 /*_reg*/, u32 /*arg*/) { - if (index >= method_registers.nv308a_size_out_x()) + const u32 out_x_max = method_registers.nv308a_size_out_x(); + + if (index >= out_x_max) { // Skip return; } - u32 color = arg; - u32 write_len = 4; + // Get position of the current command arg + const u32 src_offset = rsx->fifo_ctrl->get_pos() - 4; + + // Get real args count (starting from NV3089_COLOR) + const u32 count = std::min({rsx->fifo_ctrl->get_remaining_args_count() + 1, + static_cast(((rsx->ctrl->put & ~3ull) - src_offset) / 4), 0x700 - index, out_x_max - index}); + + const u32 dst_dma = method_registers.blit_engine_output_location_nv3062(); + const u32 dst_offset = method_registers.blit_engine_output_offset_nv3062(); + const u32 out_pitch = method_registers.blit_engine_output_pitch_nv3062(); + + const u32 x = method_registers.nv308a_x() + index; + const u32 y = method_registers.nv308a_y(); + + // TODO + //auto res = vm::passive_lock(address, address + write_len); + switch (method_registers.blit_engine_nv3062_color_format()) { case blit_engine::transfer_destination_format::a8r8g8b8: case blit_engine::transfer_destination_format::y32: { - // Bit cast + // Bit cast - optimize to mem copy + + const auto dst = vm::_ptr(get_address(dst_offset + (x * 4) + (out_pitch * y), dst_dma, HERE)); + const auto src = vm::_ptr(get_address(src_offset, CELL_GCM_LOCATION_MAIN, HERE)); + + const u32 data_length = count * 4; + + if (rsx->fifo_ctrl->last_cmd() & RSX_METHOD_NON_INCREMENT_CMD_MASK) [[unlikely]] + { + // Move last 32 bits + reinterpret_cast(dst)[0] = reinterpret_cast(src)[count - 1]; + } + else if (dst_dma & CELL_GCM_LOCATION_MAIN) + { + // May overlap + std::memmove(dst, src, data_length); + } + else + { + // Never overlaps + std::memcpy(dst, src, data_length); + } + break; } case blit_engine::transfer_destination_format::r5g6b5: { - // Input is considered to be ARGB8 - u32 r = (arg >> 16) & 0xFF; - u32 g = (arg >> 8) & 0xFF; - u32 b = arg & 0xFF; + const auto dst = vm::_ptr(get_address(dst_offset + (x * 2) + (y * out_pitch), dst_dma, HERE)); + const auto src = vm::_ptr(get_address(src_offset, CELL_GCM_LOCATION_MAIN, HERE)); + + auto convert = [](u32 input) -> u16 + { + // Input is considered to be ARGB8 + u32 r = (input >> 16) & 0xFF; + u32 g = (input >> 8) & 0xFF; + u32 b = input & 0xFF; + + r = (r * 32) / 255; + g = (g * 64) / 255; + b = (b * 32) / 255; + return static_cast((r << 11) | (g << 5) | b); + }; + + if (rsx->fifo_ctrl->last_cmd() & RSX_METHOD_NON_INCREMENT_CMD_MASK) [[unlikely]] + { + // Move last 16 bits + dst[0] = convert(src[count - 1]); + break; + } + + for (u32 i = 0; i < count; i++) + { + dst[i] = convert(src[i]); + } - r = u32(r * 32 / 255.f); - g = u32(g * 64 / 255.f); - b = u32(b * 32 / 255.f); - color = (r << 11) | (g << 5) | b; - write_len = 2; break; } default: @@ -857,27 +914,16 @@ namespace rsx } } - const u16 x = method_registers.nv308a_x(); - const u16 y = method_registers.nv308a_y(); - const u32 pixel_offset = (method_registers.blit_engine_output_pitch_nv3062() * y) + (x * write_len); - u32 address = get_address(method_registers.blit_engine_output_offset_nv3062() + pixel_offset + (index * write_len), method_registers.blit_engine_output_location_nv3062(), HERE); - - //auto res = vm::passive_lock(address, address + write_len); - - switch (write_len) - { - case 4: - vm::write32(address, color); - break; - case 2: - vm::write16(address, static_cast(color)); - break; - default: - fmt::throw_exception("Unreachable" HERE); - } - //res->release(0); - rsx->m_graphics_state |= rsx::pipeline_state::fragment_program_dirty; + + if (!(dst_dma & CELL_GCM_LOCATION_MAIN)) + { + // Set this flag on LOCAL memory transfer + rsx->m_graphics_state |= rsx::pipeline_state::fragment_program_dirty; + } + + // Skip "handled methods" + rsx->fifo_ctrl->skip_methods(count - 1); } }; } @@ -2518,40 +2564,42 @@ namespace rsx namespace method_detail { - template class T, int Index = 0> + template class T, u32 Index = 0> struct bind_range_impl_t { static inline void impl() { methods[Id] = &T::impl; - bind_range_impl_t::impl(); + + if constexpr (Count > 1) + { + bind_range_impl_t::impl(); + } } }; - template class T> - struct bind_range_impl_t - { - static inline void impl() - { - } - }; - - template class T, int Index = 0> + template class T, u32 Index = 0> static inline void bind_range() { + static_assert(Step && Count && Id + u64{Step} * (Count - 1) < 0x10000 / 4); + bind_range_impl_t::impl(); } - template + template static void bind() { + static_assert(Id < 0x10000 / 4); + methods[Id] = Func; } - template + template static void bind_array() { - for (int i = Id; i < Id + Count * Step; i += Step) + static_assert(Step && Count && Id + u64{Step} * (Count - 1) < 0x10000 / 4); + + for (u32 i = Id; i < Id + Count * Step; i += Step) { methods[i] = Func; } @@ -2917,8 +2965,6 @@ namespace rsx // Unknown (NV4097?) bind<(0x171c >> 2), trace_method>(); - bind_array<(0xac00 >> 2), 1, 16, trace_method>(); // Unknown texture control register - bind_array<(0xac40 >> 2), 1, 16, trace_method>(); // NV406E bind(); @@ -3025,9 +3071,14 @@ namespace rsx bind(); bind(); - //NV308A - bind_range(); - bind_range(); + //NV308A (0xa400..0xbffc!) + bind_range(); + bind_range(); + bind_range(); + bind_range(); + bind_range(); + bind_range(); + bind_range(); //NV3089 bind();