#pragma once #include "Utilities/File.h" #include "Utilities/mutex.h" #include "Utilities/cond.h" #include "Utilities/JIT.h" #include "SPUThread.h" #include #include #include #include #include // Helper class class spu_cache { fs::file m_file; public: spu_cache(const std::string& loc); ~spu_cache(); operator bool() const { return m_file.operator bool(); } std::deque> get(); void add(const std::vector& func); static void initialize(); }; // Helper class class spu_runtime { mutable shared_mutex m_mutex; mutable cond_variable m_cond; mutable atomic_t m_passive_locks{0}; atomic_t m_reset_count{0}; struct func_compare { // Comparison function for SPU programs bool operator()(const std::vector& lhs, const std::vector& rhs) const; }; // All functions std::map, spu_function_t, func_compare> m_map; // All functions as PIC std::map, spu_function_t> m_pic_map; // Debug module output location std::string m_cache_path; // Scratch vector std::vector, spu_function_t>> m_flat_list; public: // Trampoline to spu_recompiler_base::dispatch static const spu_function_t tr_dispatch; // Trampoline to spu_recompiler_base::branch static const spu_function_t tr_branch; public: spu_runtime(); const std::string& get_cache_path() const { return m_cache_path; } // Add compiled function and generate trampoline if necessary bool add(u64 last_reset_count, void* where, spu_function_t compiled); // Return opaque pointer for add() void* find(u64 last_reset_count, const std::vector&); // Find existing function spu_function_t find(const u32* ls, u32 addr) const; // Generate a patchable trampoline to spu_recompiler_base::branch spu_function_t make_branch_patchpoint() const; // reset() arg retriever, for race avoidance (can result in double reset) u64 get_reset_count() const { return m_reset_count.load(); } // Remove all compiled function and free JIT memory u64 reset(std::size_t last_reset_count); // Handle cpu_flag::jit_return void handle_return(spu_thread* _spu); // All dispatchers (array allocated in jit memory) static atomic_t* const g_dispatcher; // Recompiler entry point static const spu_function_t g_gateway; // Longjmp to the end of the gateway function (native CC) static void(*const g_escape)(spu_thread*); // Similar to g_escape, but doing tail call to the new function. static void(*const g_tail_escape)(spu_thread*, spu_function_t, u8*); // Interpreter entry point static spu_function_t g_interpreter; struct passive_lock { spu_runtime& _this; passive_lock(const passive_lock&) = delete; passive_lock(spu_runtime& _this) : _this(_this) { std::lock_guard lock(_this.m_mutex); _this.m_passive_locks++; } ~passive_lock() { _this.m_passive_locks--; } }; // Exclusive lock within passive_lock scope struct writer_lock { spu_runtime& _this; bool notify = false; writer_lock(const writer_lock&) = delete; writer_lock(spu_runtime& _this) : _this(_this) { // Temporarily release the passive lock _this.m_passive_locks--; _this.m_mutex.lock(); } ~writer_lock() { _this.m_passive_locks++; _this.m_mutex.unlock(); if (notify) { _this.m_cond.notify_all(); } } }; struct reader_lock { const spu_runtime& _this; reader_lock(const reader_lock&) = delete; reader_lock(const spu_runtime& _this) : _this(_this) { _this.m_passive_locks--; _this.m_mutex.lock_shared(); } ~reader_lock() { _this.m_passive_locks++; _this.m_mutex.unlock_shared(); } }; }; // SPU Recompiler instance base class class spu_recompiler_base { public: enum : u8 { s_reg_lr = 0, s_reg_sp = 1, s_reg_80 = 80, s_reg_127 = 127, s_reg_mfc_eal, s_reg_mfc_lsa, s_reg_mfc_tag, s_reg_mfc_size, // Max number of registers (for m_regmod) s_reg_max }; // Classify terminator instructions enum class term_type : unsigned char { br, ret, call, fallthrough, indirect_call, interrupt_call, }; protected: std::shared_ptr m_spurt; u32 m_pos; u32 m_size; // Bit indicating start of the block std::bitset<0x10000> m_block_info; // GPR modified by the instruction (-1 = not set) std::array m_regmod; std::array m_use_ra; std::array m_use_rb; std::array m_use_rc; // List of possible targets for the instruction (entry shouldn't exist for simple instructions) std::unordered_map, value_hash> m_targets; // List of block predecessors std::unordered_map, value_hash> m_preds; // List of function entry points and return points (set after BRSL, BRASL, BISL, BISLED) std::bitset<0x10000> m_entry_info; // Set after return points and disjoint chunks std::bitset<0x10000> m_ret_info; // Basic block information struct block_info { // Address of the chunk entry point (chunk this block belongs to) u32 chunk = 0x40000; // Number of instructions u16 size = 0; // Internal use flag bool analysed = false; // Terminator instruction type term_type terminator; // Bit mask of the registers modified in the block std::bitset reg_mod{}; // Set if last modifying instruction produces xfloat std::bitset reg_mod_xf{}; // Set if the initial register value in this block may be xfloat std::bitset reg_maybe_xf{}; // Bit mask of the registers used (before modified) std::bitset reg_use{}; // Bit mask of the trivial (u32 x 4) constant value resulting in this block std::bitset reg_const{}; // Bit mask of register saved onto the stack before use std::bitset reg_save_dom{}; // Address of the function u32 func = 0x40000; // Value subtracted from $SP in this block, negative if something funny is done on $SP u32 stack_sub = 0; // Constant values associated with reg_const std::array reg_val32; // Registers loaded from the stack in this block (stack offset) std::array reg_load_mod{}; // Single source of the reg value (dominating block address within the same chunk) or a negative number std::array reg_origin, reg_origin_abs; // All possible successor blocks std::basic_string targets; // All predeccessor blocks std::basic_string preds; }; // Sorted basic block info std::map m_bbs; // Sorted advanced block (chunk) list std::basic_string m_chunks; // Function information struct func_info { // Size to the end of last basic block u16 size = 0; // Determines whether a function is eligible for optimizations bool good = false; // Call targets std::basic_string calls; // Register save info (stack offset) std::array reg_save_off{}; }; // Sorted function info std::map m_funcs; std::shared_ptr m_cache; private: // For private use std::bitset<0x10000> m_bits; // For private use std::vector workload; // Result of analyse(), to avoid copying and allocation std::vector result; public: spu_recompiler_base(); virtual ~spu_recompiler_base(); // Initialize virtual void init() = 0; // Compile function (may fail) virtual spu_function_t compile(u64 last_reset_count, const std::vector&) = 0; // Compile function, handle failure void make_function(const std::vector&); // Default dispatch function fallback (second arg is unused) static void dispatch(spu_thread&, void*, u8* rip); // Target for the unresolved patch point (second arg is unused) static void branch(spu_thread&, void*, u8* rip); // Get the function data at specified address const std::vector& analyse(const be_t* ls, u32 lsa); // Print analyser internal state void dump(std::string& out); // Get SPU Runtime spu_runtime& get_runtime() { if (!m_spurt) { init(); } return *m_spurt; } // Create recompiler instance (ASMJIT) static std::unique_ptr make_asmjit_recompiler(); // Create recompiler instance (LLVM) static std::unique_ptr make_llvm_recompiler(u8 magn = 0); };