mirror of
https://github.com/RPCS3/rpcs3.git
synced 2026-05-07 13:37:46 +00:00
Performance improvements and CR fixes
This commit is contained in:
parent
15330a86ee
commit
1b63bf130c
14 changed files with 195 additions and 116 deletions
|
|
@ -176,14 +176,16 @@ void mfc_thread::cpu_task()
|
|||
|
||||
data = to_write;
|
||||
vm::reservation_update(cmd.eal, 128);
|
||||
vm::notify(cmd.eal, 128);
|
||||
_xend();
|
||||
vm::notify(cmd.eal, 128);
|
||||
}
|
||||
else
|
||||
{
|
||||
vm::writer_lock lock(0);
|
||||
data = to_write;
|
||||
vm::reservation_update(cmd.eal, 128);
|
||||
{
|
||||
vm::writer_lock lock(0);
|
||||
data = to_write;
|
||||
vm::reservation_update(cmd.eal, 128);
|
||||
}
|
||||
vm::notify(cmd.eal, 128);
|
||||
}
|
||||
}
|
||||
|
|
@ -356,7 +358,6 @@ void mfc_thread::cpu_task()
|
|||
}
|
||||
else
|
||||
{
|
||||
vm::reader_lock lock;
|
||||
vm::notify_all();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -967,14 +967,18 @@ extern bool ppu_stwcx(ppu_thread& ppu, u32 addr, u32 reg_value)
|
|||
return result;
|
||||
}
|
||||
|
||||
vm::writer_lock lock(0);
|
||||
|
||||
const bool result = ppu.rtime == vm::reservation_acquire(addr, sizeof(u32)) && data.compare_and_swap_test(static_cast<u32>(ppu.rdata), reg_value);
|
||||
|
||||
if (result)
|
||||
bool result;
|
||||
{
|
||||
vm::reservation_update(addr, sizeof(u32));
|
||||
vm::notify(addr, sizeof(u32));
|
||||
vm::writer_lock lock(0);
|
||||
|
||||
result = ppu.rtime == vm::reservation_acquire(addr, sizeof(u32)) && data.compare_and_swap_test(static_cast<u32>(ppu.rdata), reg_value);
|
||||
|
||||
if (result)
|
||||
{
|
||||
vm::reservation_update(addr, sizeof(u32));
|
||||
lock.unlock();
|
||||
vm::notify(addr, sizeof(u32));
|
||||
}
|
||||
}
|
||||
|
||||
ppu.raddr = 0;
|
||||
|
|
@ -1011,14 +1015,18 @@ extern bool ppu_stdcx(ppu_thread& ppu, u32 addr, u64 reg_value)
|
|||
return result;
|
||||
}
|
||||
|
||||
vm::writer_lock lock(0);
|
||||
|
||||
const bool result = ppu.rtime == vm::reservation_acquire(addr, sizeof(u64)) && data.compare_and_swap_test(ppu.rdata, reg_value);
|
||||
|
||||
if (result)
|
||||
bool result;
|
||||
{
|
||||
vm::reservation_update(addr, sizeof(u64));
|
||||
vm::notify(addr, sizeof(u64));
|
||||
vm::writer_lock lock(0);
|
||||
|
||||
result = ppu.rtime == vm::reservation_acquire(addr, sizeof(u64)) && data.compare_and_swap_test(ppu.rdata, reg_value);
|
||||
|
||||
if (result)
|
||||
{
|
||||
vm::reservation_update(addr, sizeof(u64));
|
||||
lock.unlock();
|
||||
vm::notify(addr, sizeof(u64));
|
||||
}
|
||||
}
|
||||
|
||||
ppu.raddr = 0;
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ spu_recompiler::spu_recompiler()
|
|||
}
|
||||
}
|
||||
|
||||
bool spu_recompiler::compile(std::shared_ptr<spu_function_contents_t> f)
|
||||
bool spu_recompiler::compile(spu_function_contents_t* f)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_mutex);
|
||||
|
||||
|
|
@ -451,10 +451,12 @@ void spu_recompiler::LNOP(spu_opcode_t op)
|
|||
|
||||
void invalidate_jit(SPUThread* _spu)
|
||||
{
|
||||
for (auto& func : _spu->compiled_functions)
|
||||
for (u32 i = _spu->first_clean_func_index; i < _spu->last_clean_func_index; i++)
|
||||
{
|
||||
func->dirty_bit = true;
|
||||
_spu->compiled_functions[i].dirty_bit = true;
|
||||
}
|
||||
_spu->first_clean_func_index = -1;
|
||||
_spu->last_clean_func_index = 0;
|
||||
}
|
||||
|
||||
void spu_recompiler::SYNC(spu_opcode_t op)
|
||||
|
|
@ -468,6 +470,8 @@ void spu_recompiler::DSYNC(spu_opcode_t op)
|
|||
{
|
||||
// This instruction forces all earlier load, store, and channel instructions to complete before proceeding.
|
||||
c->mfence();
|
||||
asmjit::CCFuncCall* call = c->call(asmjit::imm_ptr(asmjit::Internal::ptr_cast<void*, void(SPUThread*)>(invalidate_jit)), asmjit::FuncSignature1<u32, SPUThread*>(asmjit::CallConv::kIdHost));
|
||||
call->setArg(0, *cpu);
|
||||
}
|
||||
|
||||
void spu_recompiler::MFSPR(spu_opcode_t op)
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ class spu_recompiler : public spu_recompiler_base
|
|||
public:
|
||||
spu_recompiler();
|
||||
|
||||
virtual bool compile(std::shared_ptr<spu_function_contents_t> f) override;
|
||||
virtual bool compile(spu_function_contents_t* f) override;
|
||||
|
||||
private:
|
||||
// emitter:
|
||||
|
|
|
|||
|
|
@ -5,16 +5,16 @@
|
|||
|
||||
const spu_decoder<spu_itype> s_spu_itype;
|
||||
|
||||
std::shared_ptr<spu_function_contents_t> SPUDatabase::find(const be_t<u32>* data, u64 key, u32 max_size, void* ignore)
|
||||
spu_function_contents_t* SPUDatabase::find(const be_t<u32>* data, u64 key, u32 max_size, void* ignore)
|
||||
{
|
||||
for (auto found = m_db.equal_range(key); found.first != found.second; found.first++)
|
||||
{
|
||||
const auto & func = found.first->second;
|
||||
|
||||
// TODO remove code after a while if it hasn't been touched, else there's a big memory bloat here
|
||||
// TODO remove code after a while if it hasn't been touched, else there's a big memory bloat here and switch memcmp with compare_func
|
||||
|
||||
// Compare binary data explicitly (TODO: optimize)
|
||||
if (func.get() != ignore && LIKELY(func->size <= max_size) && memcmp(func->data.data(), data, func->size) == 0)
|
||||
if (func != ignore && LIKELY(func->size <= max_size) && std::memcmp(func->data.data(), data, func->size) == 0)
|
||||
{
|
||||
return func;
|
||||
}
|
||||
|
|
@ -35,7 +35,15 @@ SPUDatabase::~SPUDatabase()
|
|||
// TODO: serialize database
|
||||
}
|
||||
|
||||
std::shared_ptr<spu_function_contents_t> SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, void* ignore /*=nullptr*/)
|
||||
bool IsDeterministicBranch(spu_itype::type type)
|
||||
{
|
||||
return type == spu_itype::BR || type == spu_itype::BRSL
|
||||
|| type == spu_itype::BRA || type == spu_itype::BRASL
|
||||
|| type == spu_itype::BI || type == spu_itype::BISL
|
||||
|| type == spu_itype::IRET;
|
||||
}
|
||||
|
||||
spu_function_contents_t* SPUDatabase::analyse(const be_t<u32>* ls, u32 entry, void* ignore /*=nullptr*/)
|
||||
{
|
||||
const u32 max_limit = 0x40000;
|
||||
// Check arguments (bounds and alignment)
|
||||
|
|
@ -77,6 +85,8 @@ std::shared_ptr<spu_function_contents_t> SPUDatabase::analyse(const be_t<u32>* l
|
|||
// Initialize block entries with the function entry point
|
||||
std::set<u32> blocks{ entry };
|
||||
|
||||
std::vector<u32> blocks_size;
|
||||
|
||||
// Entries of adjacent functions; jump table entries
|
||||
std::set<u32> adjacent, jt;
|
||||
|
||||
|
|
@ -321,8 +331,27 @@ std::shared_ptr<spu_function_contents_t> SPUDatabase::analyse(const be_t<u32>* l
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
blocks_size.reserve(blocks.size());
|
||||
for (u32 block : blocks)
|
||||
{
|
||||
u32 size = 0;
|
||||
for (u32 i = block / 4; i < 0x10000; i++)
|
||||
{
|
||||
if (ls[i] == 0 || IsDeterministicBranch(s_spu_itype.decode(ls[i])))
|
||||
{
|
||||
size = (i * 4) - block + 4;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (size == 0)
|
||||
{
|
||||
verify("No way out of a block"), size != 0;
|
||||
}
|
||||
blocks_size.push_back(size);
|
||||
}
|
||||
|
||||
// Prepare new function (set addr and size)
|
||||
auto func = std::make_shared<spu_function_contents_t>(entry, limit - entry);
|
||||
auto func = new spu_function_contents_t(entry, limit - entry);
|
||||
|
||||
// Copy function contents
|
||||
func->data = { ls + entry / 4, ls + limit / 4 };
|
||||
|
|
@ -354,6 +383,8 @@ std::shared_ptr<spu_function_contents_t> SPUDatabase::analyse(const be_t<u32>* l
|
|||
}
|
||||
}
|
||||
|
||||
func->blocks_size = std::move(blocks_size);
|
||||
|
||||
// Set whether the function can reset stack
|
||||
func->does_reset_stack = ila_sp_pos < limit;
|
||||
|
||||
|
|
@ -366,7 +397,7 @@ std::shared_ptr<spu_function_contents_t> SPUDatabase::analyse(const be_t<u32>* l
|
|||
m_db.emplace(key, func);
|
||||
}
|
||||
|
||||
LOG_NOTICE(SPU, "Function detected [0x%05x-0x%05x] (size=0x%x)", func->addr, func->addr + func->size, func->size);
|
||||
LOG_FATAL(SPU, "Function detected [0x%05x-0x%05x] (size=0x%x)", func->addr, func->addr + func->size, func->size);
|
||||
|
||||
return func;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -265,6 +265,9 @@ struct spu_function_contents_t
|
|||
// Basic blocks (start addresses)
|
||||
std::set<u32> blocks;
|
||||
|
||||
// Basic blocks size
|
||||
std::vector<u32> blocks_size;
|
||||
|
||||
// Functions possibly called by this function (may not be available)
|
||||
std::set<u32> adjacent;
|
||||
|
||||
|
|
@ -288,7 +291,7 @@ struct spu_function_contents_t
|
|||
union spu_function_t
|
||||
{
|
||||
// The function itself and its data
|
||||
std::shared_ptr<spu_function_contents_t> contents;
|
||||
spu_function_contents_t * contents;
|
||||
|
||||
// Whether pages the function is in were written to since its last execution
|
||||
bool dirty_bit : 1;
|
||||
|
|
@ -299,7 +302,7 @@ union spu_function_t
|
|||
}
|
||||
|
||||
spu_function_t() : contents(nullptr) {};
|
||||
~spu_function_t() {dirty_bit = false; contents.reset();};
|
||||
~spu_function_t() {};
|
||||
};
|
||||
|
||||
// SPU Function Database (must be global or PS3 process-local)
|
||||
|
|
@ -308,15 +311,15 @@ class SPUDatabase final : spu_itype
|
|||
shared_mutex m_mutex;
|
||||
|
||||
// All registered functions (uses addr and first instruction as a key)
|
||||
std::unordered_multimap<u64, std::shared_ptr<spu_function_contents_t>> m_db;
|
||||
std::unordered_multimap<u64, spu_function_contents_t*> m_db;
|
||||
|
||||
// For internal use
|
||||
std::shared_ptr<spu_function_contents_t> find(const be_t<u32>* data, u64 key, u32 max_size, void* ignore = nullptr);
|
||||
spu_function_contents_t* find(const be_t<u32>* data, u64 key, u32 max_size, void* ignore = nullptr);
|
||||
|
||||
public:
|
||||
SPUDatabase();
|
||||
~SPUDatabase();
|
||||
|
||||
// Try to retrieve SPU function information
|
||||
std::shared_ptr<spu_function_contents_t> analyse(const be_t<u32>* ls, u32 entry, void * ignore=nullptr);
|
||||
spu_function_contents_t* analyse(const be_t<u32>* ls, u32 entry, void * ignore=nullptr);
|
||||
};
|
||||
|
|
|
|||
|
|
@ -102,7 +102,7 @@ void spu_interpreter::RDCH(SPUThread& spu, spu_opcode_t op)
|
|||
else
|
||||
{
|
||||
memset(&spu.gpr[op.rt], 0, 3*sizeof(u32));
|
||||
spu.gpr[op.rt]._u32[3] = result; v128::from32r(result);
|
||||
spu.gpr[op.rt]._u32[3] = result;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -24,46 +24,53 @@ void spu_recompiler_base::enter(SPUThread& spu)
|
|||
const auto _ls = vm::_ptr<u32>(spu.offset);
|
||||
|
||||
// Search if cached data matches
|
||||
auto & func = spu.compiled_cache[spu.pc / 4];
|
||||
auto func_ptr = spu.compiled_cache[spu.pc / 4];
|
||||
|
||||
// func.contents is there only as a temporary test, to see if SPU codfe not getting invalidated is the reason for the crashes
|
||||
if (func.contents || func.dirty_bit)
|
||||
// func.contents is there only as a temporary test, to see if SPU code not getting invalidated is the reason for the crashes
|
||||
if (func_ptr && func_ptr->dirty_bit)
|
||||
{
|
||||
auto & func = *func_ptr;
|
||||
func.dirty_bit = false;
|
||||
|
||||
// This memcmp acts as a fast path instead of finding it again in analyse.
|
||||
if (memcmp(func.contents->data.data(), _ls + (spu.pc / 4), func.contents->size) != 0)
|
||||
u32 index = (reinterpret_cast<size_t>(func_ptr) - reinterpret_cast<size_t>(&spu.compiled_functions[0])) / sizeof(func);
|
||||
spu.first_clean_func_index = std::min<u32>(index, spu.first_clean_func_index);
|
||||
spu.last_clean_func_index = std::max<u32>(index + 1, spu.last_clean_func_index);
|
||||
|
||||
if (!spu.same_function(func.contents, _ls + (spu.pc / 4)))
|
||||
{
|
||||
func.contents = spu.spu_db->analyse(_ls, spu.pc, func.contents.get());
|
||||
func.contents = spu.spu_db->analyse(_ls, spu.pc, func.contents);
|
||||
}
|
||||
}
|
||||
else if (!func)
|
||||
else if (!func_ptr)
|
||||
{
|
||||
auto & func = spu.compiled_functions[++spu.next_compiled_func_index];
|
||||
func.contents = spu.spu_db->analyse(_ls, spu.pc);
|
||||
spu.compiled_functions.push_back(&func);
|
||||
func_ptr = &func;
|
||||
spu.compiled_cache[spu.pc / 4] = func_ptr;
|
||||
spu.last_clean_func_index = spu.next_compiled_func_index + 1;
|
||||
spu.first_clean_func_index = std::min<u32>(spu.first_clean_func_index, spu.next_compiled_func_index);
|
||||
}
|
||||
|
||||
// Reset callstack if necessary
|
||||
if ((func.contents->does_reset_stack && spu.recursion_level) || spu.recursion_level >= 128)
|
||||
if ((func_ptr->contents->does_reset_stack && spu.recursion_level) || spu.recursion_level >= 128)
|
||||
{
|
||||
spu.state += cpu_flag::ret;
|
||||
return;
|
||||
}
|
||||
|
||||
// Compile if needed
|
||||
if (!func.contents->compiled)
|
||||
if (!func_ptr->contents->compiled)
|
||||
{
|
||||
if (!spu.spu_rec)
|
||||
{
|
||||
spu.spu_rec = fxm::get_always<spu_recompiler>();
|
||||
}
|
||||
|
||||
spu.spu_rec->compile(func.contents);
|
||||
spu.spu_rec->compile(func_ptr->contents);
|
||||
|
||||
if (!func.contents->compiled) fmt::throw_exception("Compilation failed" HERE);
|
||||
if (!func_ptr->contents->compiled) fmt::throw_exception("Compilation failed" HERE);
|
||||
}
|
||||
|
||||
const u32 res = func.contents->compiled(&spu, _ls);
|
||||
const u32 res = func_ptr->contents->compiled(&spu, _ls);
|
||||
|
||||
if (spu.pending_exception)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ class spu_recompiler_base
|
|||
protected:
|
||||
std::mutex m_mutex; // must be locked in compile()
|
||||
|
||||
std::shared_ptr<const spu_function_contents_t> m_func; // current function
|
||||
const spu_function_contents_t* m_func; // current function
|
||||
|
||||
u32 m_pos; // current position
|
||||
|
||||
|
|
@ -18,7 +18,7 @@ public:
|
|||
virtual ~spu_recompiler_base();
|
||||
|
||||
// Compile specified function
|
||||
virtual bool compile(std::shared_ptr<spu_function_contents_t>) = 0;
|
||||
virtual bool compile(spu_function_contents_t*) = 0;
|
||||
|
||||
// Run
|
||||
static void enter(class SPUThread&);
|
||||
|
|
|
|||
|
|
@ -557,14 +557,15 @@ void SPUThread::do_dma_transfer(const spu_mfc_cmd& args, bool from_mfc)
|
|||
|
||||
if (spu)
|
||||
{
|
||||
for (auto& func : spu->compiled_functions)
|
||||
for (u32 i = 0; i < spu->next_compiled_func_index; i++)
|
||||
{
|
||||
auto faddr = func->contents->addr;
|
||||
auto fsize = func->contents->size;
|
||||
auto& func = spu->compiled_functions[i];
|
||||
auto faddr = func.contents->addr;
|
||||
auto fsize = func.contents->size;
|
||||
|
||||
if (faddr >= eal && faddr + fsize < eal + args.size)
|
||||
{
|
||||
func->dirty_bit = true;
|
||||
func.dirty_bit = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -639,6 +640,7 @@ void SPUThread::process_mfc_cmd()
|
|||
}
|
||||
|
||||
waiter->remove();
|
||||
waiter = nullptr;
|
||||
}
|
||||
else if (s_use_rtm && utils::transaction_enter())
|
||||
{
|
||||
|
|
@ -820,18 +822,22 @@ void SPUThread::process_mfc_cmd()
|
|||
// Try to process small transfers immediately
|
||||
if (ch_mfc_cmd.size <= max_imm_dma_size && mfc_queue.size() == 0)
|
||||
{
|
||||
vm::reader_lock lock(vm::try_to_lock);
|
||||
|
||||
if (!lock)
|
||||
/* TODO catch the exception (Currently they are ignored and slow things down by grabbing a lock)
|
||||
{
|
||||
break;
|
||||
}
|
||||
vm::reader_lock lock(vm::try_to_lock);
|
||||
|
||||
if (!vm::check_addr(ch_mfc_cmd.eal, ch_mfc_cmd.size, vm::page_readable | (ch_mfc_cmd.cmd & MFC_PUT_CMD ? vm::page_writable : 0)))
|
||||
{
|
||||
// TODO
|
||||
break;
|
||||
if (!lock)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (!vm::check_addr(ch_mfc_cmd.eal, ch_mfc_cmd.size, vm::page_readable | (ch_mfc_cmd.cmd & MFC_PUT_CMD ? vm::page_writable : 0)))
|
||||
{
|
||||
// TODO
|
||||
break;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
do_dma_transfer(ch_mfc_cmd, false);
|
||||
return;
|
||||
|
|
@ -851,13 +857,6 @@ void SPUThread::process_mfc_cmd()
|
|||
{
|
||||
if (ch_mfc_cmd.size <= max_imm_dma_size && mfc_queue.size() == 0)
|
||||
{
|
||||
vm::reader_lock lock(vm::try_to_lock);
|
||||
|
||||
if (!lock)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
struct list_element
|
||||
{
|
||||
be_t<u16> sb;
|
||||
|
|
@ -888,11 +887,20 @@ void SPUThread::process_mfc_cmd()
|
|||
break;
|
||||
}
|
||||
|
||||
if (!vm::check_addr(addr, size, vm::page_readable | (ch_mfc_cmd.cmd & MFC_PUT_CMD ? vm::page_writable : 0)))
|
||||
/* TODO Catch and handle exceptions here
|
||||
{
|
||||
// TODO
|
||||
break;
|
||||
}
|
||||
vm::reader_lock lock(vm::try_to_lock);
|
||||
|
||||
if (!lock)
|
||||
{
|
||||
break;
|
||||
}
|
||||
if (!vm::check_addr(addr, size, vm::page_readable | (ch_mfc_cmd.cmd & MFC_PUT_CMD ? vm::page_writable : 0)))
|
||||
{
|
||||
// TODO
|
||||
break;
|
||||
}
|
||||
}*/
|
||||
|
||||
spu_mfc_cmd transfer;
|
||||
transfer.eal = addr;
|
||||
|
|
@ -1184,6 +1192,7 @@ bool SPUThread::get_ch_value(u32 ch, u32& out)
|
|||
if (test(state & cpu_flag::stop))
|
||||
{
|
||||
waiter->remove();
|
||||
waiter = nullptr;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -1193,6 +1202,7 @@ bool SPUThread::get_ch_value(u32 ch, u32& out)
|
|||
if (waiter != nullptr)
|
||||
{
|
||||
waiter->remove();
|
||||
waiter = nullptr;
|
||||
}
|
||||
|
||||
out = res;
|
||||
|
|
@ -1826,6 +1836,22 @@ bool SPUThread::stop_and_signal(u32 code)
|
|||
}
|
||||
}
|
||||
|
||||
bool SPUThread::same_function(const spu_function_contents_t * func, const void * addr)
|
||||
{
|
||||
auto size = func->blocks_size.cbegin();
|
||||
auto dst = reinterpret_cast<const u8 *>(addr);
|
||||
auto src = vm::ps3::_ptr<u8>(offset);
|
||||
|
||||
for (auto block : func->blocks)
|
||||
{
|
||||
u32 offset = block - func->addr;
|
||||
if (memcmp(src + offset, dst + offset, *size) != 0) return false;
|
||||
size++;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void SPUThread::halt()
|
||||
{
|
||||
LOG_TRACE(SPU, "halt()");
|
||||
|
|
|
|||
|
|
@ -215,8 +215,7 @@ public:
|
|||
{
|
||||
const auto old = data.fetch_op([](sync_var_t& data)
|
||||
{
|
||||
sync_var_t t;
|
||||
*reinterpret_cast<u64*>(&t) = 0;
|
||||
sync_var_t t{};
|
||||
t.wait = !data.count;
|
||||
data = t;
|
||||
});
|
||||
|
|
@ -582,8 +581,13 @@ public:
|
|||
|
||||
// No need for shared_ptr in the following two, as whenever something is removed or added to one,
|
||||
// the same goes for the other.
|
||||
std::array<spu_function_t, 65536> compiled_cache{};
|
||||
std::vector<spu_function_t*> compiled_functions{};
|
||||
std::array<spu_function_t*, 65536> compiled_cache{};
|
||||
std::array<spu_function_t, 65536> compiled_functions{};
|
||||
u32 next_compiled_func_index = -1;
|
||||
u32 first_clean_func_index = 0;
|
||||
u32 last_clean_func_index = 0;
|
||||
|
||||
|
||||
std::shared_ptr<class SPUDatabase> spu_db;
|
||||
std::shared_ptr<class spu_recompiler_base> spu_rec;
|
||||
u32 recursion_level = 0;
|
||||
|
|
@ -599,6 +603,7 @@ public:
|
|||
bool get_ch_value(u32 ch, u32& out);
|
||||
bool set_ch_value(u32 ch, u32 value);
|
||||
bool stop_and_signal(u32 code);
|
||||
bool same_function(const spu_function_contents_t * func, const void * addr);
|
||||
void halt();
|
||||
|
||||
void fast_call(u32 ls_addr);
|
||||
|
|
|
|||
|
|
@ -777,6 +777,12 @@ error_code sys_spu_thread_write_ls(u32 id, u32 lsa, u64 value, u32 type)
|
|||
default: return CELL_EINVAL;
|
||||
}
|
||||
|
||||
auto func = thread->compiled_cache[lsa / 4];
|
||||
if (func)
|
||||
{
|
||||
func->dirty_bit = true;
|
||||
}
|
||||
|
||||
return CELL_OK;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -131,25 +131,20 @@ u64 get_timebased_time()
|
|||
// Returns some relative time in microseconds, don't change this fact
|
||||
u64 get_system_time()
|
||||
{
|
||||
#ifdef _WIN32
|
||||
// Pull the time directly from Windows shared page (Constant location on all Windows machines)
|
||||
return *reinterpret_cast<u64*>(0x7ffe0014) / 10;
|
||||
#else
|
||||
while (true)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
LARGE_INTEGER count;
|
||||
verify(HERE), QueryPerformanceCounter(&count);
|
||||
|
||||
const u64 time = count.QuadPart;
|
||||
const u64 freq = s_time_aux_info.perf_freq;
|
||||
|
||||
const u64 result = time / freq * 1000000u + (time % freq) * 1000000u / freq;
|
||||
#else
|
||||
struct timespec ts;
|
||||
verify(HERE), ::clock_gettime(CLOCK_MONOTONIC, &ts) == 0;
|
||||
|
||||
const u64 result = static_cast<u64>(ts.tv_sec) * 1000000u + static_cast<u64>(ts.tv_nsec) / 1000u;
|
||||
#endif
|
||||
|
||||
if (result) return result;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// Functions
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ void fmt_class_string<frame_limit_type>::format(std::string& out, u64 arg)
|
|||
namespace rsx
|
||||
{
|
||||
rsx_state method_registers;
|
||||
|
||||
|
||||
std::array<rsx_method_t, 0x10000 / 4> methods{};
|
||||
|
||||
void invalid_method(thread* rsx, u32 _reg, u32 arg)
|
||||
|
|
@ -64,10 +64,10 @@ namespace rsx
|
|||
{
|
||||
rsx->sync_point_request = true;
|
||||
const u32 addr = get_address(method_registers.semaphore_offset_406e(), method_registers.semaphore_context_dma_406e());
|
||||
if (vm::read32(addr) == arg) return;
|
||||
if (vm::ps3::read32(addr) == arg) return;
|
||||
|
||||
u64 start = get_system_time();
|
||||
while (vm::read32(addr) != arg)
|
||||
while (vm::ps3::read32(addr) != arg)
|
||||
{
|
||||
// todo: LLE: why does this one keep hanging? is it vsh system semaphore? whats actually pushing this to the command buffer?!
|
||||
if (addr == 0x40000030)
|
||||
|
|
@ -116,12 +116,11 @@ namespace rsx
|
|||
if (addr >> 28 == 0x4)
|
||||
{
|
||||
// TODO: check no reservation area instead
|
||||
vm::write32(addr, arg);
|
||||
vm::ps3::write32(addr, arg);
|
||||
return;
|
||||
}
|
||||
|
||||
vm::reader_lock lock;
|
||||
vm::write32(addr, arg);
|
||||
vm::ps3::write32(addr, arg);
|
||||
vm::notify(addr, 4);
|
||||
}
|
||||
}
|
||||
|
|
@ -162,7 +161,7 @@ namespace rsx
|
|||
{
|
||||
//
|
||||
}
|
||||
auto& sema = vm::_ref<RsxReports>(rsx->label_addr);
|
||||
auto& sema = vm::ps3::_ref<RsxReports>(rsx->label_addr);
|
||||
sema.semaphore[index].val = arg;
|
||||
sema.semaphore[index].pad = 0;
|
||||
sema.semaphore[index].timestamp = rsx->timestamp();
|
||||
|
|
@ -177,7 +176,7 @@ namespace rsx
|
|||
}
|
||||
u32 val = (arg & 0xff00ff00) | ((arg & 0xff) << 16) | ((arg >> 16) & 0xff);
|
||||
|
||||
auto& sema = vm::_ref<RsxReports>(rsx->label_addr);
|
||||
auto& sema = vm::ps3::_ref<RsxReports>(rsx->label_addr);
|
||||
sema.semaphore[index].val = val;
|
||||
sema.semaphore[index].pad = 0;
|
||||
sema.semaphore[index].timestamp = rsx->timestamp();
|
||||
|
|
@ -352,7 +351,7 @@ namespace rsx
|
|||
{
|
||||
rsx::method_registers.current_draw_clause.first_count_commands.resize(0);
|
||||
rsx::method_registers.current_draw_clause.command = draw_command::none;
|
||||
rsx::method_registers.current_draw_clause.primitive = to_primitive_type(arg);
|
||||
rsx::method_registers.current_draw_clause.primitive = rsx::method_registers.primitive_mode();
|
||||
rsxthr->begin();
|
||||
return;
|
||||
}
|
||||
|
|
@ -416,7 +415,7 @@ namespace rsx
|
|||
return;
|
||||
}
|
||||
|
||||
vm::ptr<CellGcmReportData> result = address_ptr;
|
||||
vm::ps3::ptr<CellGcmReportData> result = address_ptr;
|
||||
|
||||
switch (type)
|
||||
{
|
||||
|
|
@ -484,7 +483,7 @@ namespace rsx
|
|||
return;
|
||||
}
|
||||
|
||||
vm::ptr<CellGcmReportData> result = address_ptr;
|
||||
vm::ps3::ptr<CellGcmReportData> result = address_ptr;
|
||||
rsx->conditional_render_test_failed = (result->value == 0);
|
||||
}
|
||||
|
||||
|
|
@ -553,7 +552,7 @@ namespace rsx
|
|||
|
||||
const u32 pixel_offset = (method_registers.blit_engine_output_pitch_nv3062() * y) + (x << 2);
|
||||
u32 address = get_address(method_registers.blit_engine_output_offset_nv3062() + pixel_offset + index * 4, method_registers.blit_engine_output_location_nv3062());
|
||||
vm::write32(address, arg);
|
||||
vm::ps3::write32(address, arg);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
@ -662,7 +661,7 @@ namespace rsx
|
|||
const tiled_region dst_region = rsx->get_tiled_address(dst_offset + out_offset, dst_dma & 0xf);
|
||||
|
||||
u8* pixels_src = src_region.tile ? src_region.ptr + src_region.base : src_region.ptr;
|
||||
u8* pixels_dst = vm::_ptr<u8>(get_address(dst_offset + out_offset, dst_dma));
|
||||
u8* pixels_dst = vm::ps3::_ptr<u8>(get_address(dst_offset + out_offset, dst_dma));
|
||||
|
||||
if (out_pitch == 0)
|
||||
{
|
||||
|
|
@ -694,8 +693,7 @@ namespace rsx
|
|||
|
||||
if (convert_w == 0 || convert_h == 0)
|
||||
{
|
||||
LOG_ERROR(RSX, "NV3089_IMAGE_IN: Invalid dimensions or scaling factor. Request ignored (ds_dx=%d, dt_dy=%d)",
|
||||
method_registers.blit_engine_ds_dx(), method_registers.blit_engine_dt_dy());
|
||||
LOG_ERROR(RSX, "NV3089_IMAGE_IN: Invalid dimensions or scaling factor. Request ignored");
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -790,7 +788,7 @@ namespace rsx
|
|||
if (method_registers.blit_engine_context_surface() != blit_engine::context_surface::swizzle2d)
|
||||
{
|
||||
if (need_convert || need_clip)
|
||||
{
|
||||
{
|
||||
if (need_clip)
|
||||
{
|
||||
if (need_convert)
|
||||
|
|
@ -856,7 +854,7 @@ namespace rsx
|
|||
pixels_src = temp2.get();
|
||||
}
|
||||
|
||||
// It looks like rsx may ignore the requested swizzle size and just always
|
||||
// It looks like rsx may ignore the requested swizzle size and just always
|
||||
// round up to nearest power of 2
|
||||
/*u8 sw_width_log2 = method_registers.nv309e_sw_width_log2();
|
||||
u8 sw_height_log2 = method_registers.nv309e_sw_height_log2();
|
||||
|
|
@ -934,7 +932,7 @@ namespace rsx
|
|||
LOG_ERROR(RSX, "NV0039_OFFSET_IN: Unsupported format: inFormat=%d, outFormat=%d", in_format, out_format);
|
||||
}
|
||||
|
||||
LOG_TRACE(RSX, "NV0039_OFFSET_IN: pitch(in=0x%x, out=0x%x), line(len=0x%x, cnt=0x%x), fmt(in=0x%x, out=0x%x), notify=0x%x",
|
||||
LOG_NOTICE(RSX, "NV0039_OFFSET_IN: pitch(in=0x%x, out=0x%x), line(len=0x%x, cnt=0x%x), fmt(in=0x%x, out=0x%x), notify=0x%x",
|
||||
in_pitch, out_pitch, line_length, line_count, in_format, out_format, notify);
|
||||
|
||||
if (!in_pitch)
|
||||
|
|
@ -1028,7 +1026,7 @@ namespace rsx
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
rsx->int_flip_index++;
|
||||
rsx->current_display_buffer = arg;
|
||||
rsx->flip(arg);
|
||||
|
|
@ -1190,11 +1188,6 @@ namespace rsx
|
|||
registers[reg] = value;
|
||||
}
|
||||
|
||||
bool rsx_state::test(u32 reg, u32 value) const
|
||||
{
|
||||
return registers[reg] == value;
|
||||
}
|
||||
|
||||
namespace method_detail
|
||||
{
|
||||
template<int Id, int Step, int Count, template<u32> class T, int Index = 0>
|
||||
|
|
@ -1567,7 +1560,7 @@ namespace rsx
|
|||
|
||||
//Some custom GCM methods
|
||||
methods[GCM_SET_DRIVER_OBJECT] = nullptr;
|
||||
|
||||
|
||||
bind_array<GCM_FLIP_HEAD, 1, 2, nullptr>();
|
||||
bind_array<GCM_DRIVER_QUEUE, 1, 8, nullptr>();
|
||||
|
||||
|
|
@ -1684,8 +1677,8 @@ namespace rsx
|
|||
|
||||
// custom methods
|
||||
bind<GCM_FLIP_COMMAND, flip_command>();
|
||||
|
||||
|
||||
|
||||
return true;
|
||||
return true;
|
||||
}();
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue