From 402df6823513972d5fc35300a8f329173a9473f2 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Thu, 13 Apr 2017 18:35:49 +0300 Subject: [PATCH] sys_spu_image loading rewritten --- rpcs3/Emu/Cell/Modules/cellOvis.cpp | 2 +- rpcs3/Emu/Cell/Modules/cellSpurs.cpp | 6 +- rpcs3/Emu/Cell/Modules/cellSpurs.h | 2 +- rpcs3/Emu/Cell/Modules/sys_spu_.cpp | 39 ++++----- rpcs3/Emu/Cell/lv2/sys_spu.cpp | 113 +++++++++++++++++++-------- rpcs3/Emu/Cell/lv2/sys_spu.h | 28 +++---- 6 files changed, 109 insertions(+), 81 deletions(-) diff --git a/rpcs3/Emu/Cell/Modules/cellOvis.cpp b/rpcs3/Emu/Cell/Modules/cellOvis.cpp index f950616066..6600b38f7a 100644 --- a/rpcs3/Emu/Cell/Modules/cellOvis.cpp +++ b/rpcs3/Emu/Cell/Modules/cellOvis.cpp @@ -26,7 +26,7 @@ s32 cellOvisInitializeOverlayTable(vm::ptr ea_ovly_table, vm::cptr e return CELL_OK; } -void cellOvisFixSpuSegments(vm::ptr image) +void cellOvisFixSpuSegments(vm::ptr image) { cellOvis.todo("cellOvisFixSpuSegments(image=*0x%x)", image); } diff --git a/rpcs3/Emu/Cell/Modules/cellSpurs.cpp b/rpcs3/Emu/Cell/Modules/cellSpurs.cpp index bb705ff513..7f4608e85f 100644 --- a/rpcs3/Emu/Cell/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/Cell/Modules/cellSpurs.cpp @@ -17,7 +17,7 @@ logs::channel cellSpurs("cellSpurs", logs::level::notice); -s32 sys_spu_image_close(vm::ptr img); +s32 sys_spu_image_close(vm::ptr img); // TODO struct cell_error_t @@ -1051,9 +1051,9 @@ s32 _spurs::initialize(ppu_thread& ppu, vm::ptr spurs, u32 revision, // Import SPURS kernel spurs->spuImg.type = SYS_SPU_IMAGE_TYPE_USER; - spurs->spuImg.segs = vm::cast(vm::alloc(0x40000, vm::main)); + spurs->spuImg.segs = vm::null; spurs->spuImg.entry_point = isSecond ? CELL_SPURS_KERNEL2_ENTRY_ADDR : CELL_SPURS_KERNEL1_ENTRY_ADDR; - spurs->spuImg.nsegs = 1; + spurs->spuImg.nsegs = 0; // Create a thread group for this SPURS context std::memcpy(spuTgName.get_ptr(), spurs->prefix, spurs->prefixSize); diff --git a/rpcs3/Emu/Cell/Modules/cellSpurs.h b/rpcs3/Emu/Cell/Modules/cellSpurs.h index 6a8a7bc46d..ba72027f7a 100644 --- a/rpcs3/Emu/Cell/Modules/cellSpurs.h +++ b/rpcs3/Emu/Cell/Modules/cellSpurs.h @@ -539,7 +539,7 @@ struct alignas(128) CellSpurs atomic_t handlerExiting; // 0xD66 atomic_be_t enableEH; // 0xD68 be_t exception; // 0xD6C - sys_spu_image_t spuImg; // 0xD70 + sys_spu_image spuImg; // 0xD70 be_t flags; // 0xD80 be_t spuPriority; // 0xD84 be_t ppuPriority; // 0xD88 diff --git a/rpcs3/Emu/Cell/Modules/sys_spu_.cpp b/rpcs3/Emu/Cell/Modules/sys_spu_.cpp index 2b33f8b8f5..36dd8ce03e 100644 --- a/rpcs3/Emu/Cell/Modules/sys_spu_.cpp +++ b/rpcs3/Emu/Cell/Modules/sys_spu_.cpp @@ -28,21 +28,17 @@ s32 sys_spu_elf_get_segments(u32 elf_img, vm::ptr segments, s32 return CELL_OK; } -s32 sys_spu_image_import(vm::ptr img, u32 src, u32 type) +s32 sys_spu_image_import(vm::ptr img, u32 src, u32 type) { sysPrxForUser.warning("sys_spu_image_import(img=*0x%x, src=0x%x, type=%d)", img, src, type); - u32 entry, offset = LoadSpuImage(fs::file(vm::base(src), 0 - src), entry); - - img->type = SYS_SPU_IMAGE_TYPE_USER; - img->entry_point = entry; - img->segs.set(offset); // TODO: writing actual segment info - img->nsegs = 1; // wrong value + // Load from memory (TODO) + img->load(fs::file{vm::base(src), 0 - src}); return CELL_OK; } -s32 sys_spu_image_close(vm::ptr img) +s32 sys_spu_image_close(vm::ptr img) { sysPrxForUser.warning("sys_spu_image_close(img=*0x%x)", img); @@ -59,7 +55,7 @@ s32 sys_spu_image_close(vm::ptr img) return CELL_EINVAL; } - verify(HERE), vm::dealloc(img->segs.addr(), vm::main); // Current rough implementation + img->free(); return CELL_OK; } @@ -75,33 +71,26 @@ s32 sys_raw_spu_load(s32 id, vm::cptr path, vm::ptr entry) return CELL_ENOENT; } - u32 _entry; - LoadSpuImage(elf_file, _entry, RAW_SPU_BASE_ADDR + RAW_SPU_OFFSET * id); + sys_spu_image img; + img.load(elf_file); + img.deploy(RAW_SPU_BASE_ADDR + RAW_SPU_OFFSET * id); + img.free(); - *entry = _entry | 1; + *entry = img.entry_point | 1; return CELL_OK; } -s32 sys_raw_spu_image_load(ppu_thread& ppu, s32 id, vm::ptr img) +s32 sys_raw_spu_image_load(ppu_thread& ppu, s32 id, vm::ptr img) { sysPrxForUser.warning("sys_raw_spu_image_load(id=%d, img=*0x%x)", id, img); - // TODO: use segment info - - const auto stamp0 = get_system_time(); - - std::memcpy(vm::base(RAW_SPU_BASE_ADDR + RAW_SPU_OFFSET * id), img->segs.get_ptr(), 256 * 1024); - - const auto stamp1 = get_system_time(); + // Load SPU segments + img->deploy(RAW_SPU_BASE_ADDR + RAW_SPU_OFFSET * id); + // Use MMIO vm::write32(RAW_SPU_BASE_ADDR + RAW_SPU_OFFSET * id + RAW_SPU_PROB_OFFSET + SPU_NPC_offs, img->entry_point | 1); - const auto stamp2 = get_system_time(); - - sysPrxForUser.error("memcpy() latency: %lldus", (stamp1 - stamp0)); - sysPrxForUser.error("MMIO latency: %lldus", (stamp2 - stamp1)); - return CELL_OK; } diff --git a/rpcs3/Emu/Cell/lv2/sys_spu.cpp b/rpcs3/Emu/Cell/lv2/sys_spu.cpp index b12d7da23c..d431c16a4a 100644 --- a/rpcs3/Emu/Cell/lv2/sys_spu.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_spu.cpp @@ -18,7 +18,7 @@ namespace vm { using namespace ps3; } logs::channel sys_spu("sys_spu", logs::level::notice); -void LoadSpuImage(const fs::file& stream, u32& spu_ep, u32 addr) +void sys_spu_image::load(const fs::file& stream) { const spu_exec_object obj{stream}; @@ -27,6 +27,13 @@ void LoadSpuImage(const fs::file& stream, u32& spu_ep, u32 addr) fmt::throw_exception("Failed to load SPU image: %s" HERE, obj.get_error()); } + this->type = SYS_SPU_IMAGE_TYPE_KERNEL; + this->entry_point = obj.header.e_entry; + this->segs.set(vm::alloc(65 * 4096, vm::main)); + this->nsegs = 0; + + const u32 addr = this->segs.addr() + 4096; + sha1_context ctx; u8 output[20]; @@ -36,6 +43,8 @@ void LoadSpuImage(const fs::file& stream, u32& spu_ep, u32 addr) for (const auto& shdr : obj.shdrs) { sha1_update(&ctx, reinterpret_cast(&shdr), sizeof(spu_exec_object::shdr_t)); + + LOG_NOTICE(SPU, "** Section: sh_type=0x%x, addr=0x%llx, size=0x%llx, flags=0x%x", shdr.sh_type, shdr.sh_addr, shdr.sh_size, shdr.sh_flags); } for (const auto& prog : obj.progs) @@ -43,14 +52,40 @@ void LoadSpuImage(const fs::file& stream, u32& spu_ep, u32 addr) sha1_update(&ctx, reinterpret_cast(&prog), sizeof(spu_exec_object::phdr_t)); sha1_update(&ctx, reinterpret_cast(prog.bin.data()), prog.bin.size()); - if (prog.p_type == 0x1 /* LOAD */) + LOG_NOTICE(SPU, "** Segment: p_type=0x%x, p_vaddr=0x%llx, p_filesz=0x%llx, p_memsz=0x%llx, flags=0x%x", prog.p_type, prog.p_vaddr, prog.p_filesz, prog.p_memsz, prog.p_flags); + + if (prog.p_type == SYS_SPU_SEGMENT_TYPE_COPY) { - std::memcpy(vm::base(addr + prog.p_vaddr), prog.bin.data(), prog.p_filesz); + auto& seg = segs[nsegs++]; + seg.type = prog.p_type; + seg.ls = prog.p_vaddr; + seg.addr = addr + prog.p_vaddr; + seg.size = std::min(prog.p_filesz, prog.p_memsz); + std::memcpy(vm::base(seg.addr), prog.bin.data(), seg.size); + + if (prog.p_memsz > prog.p_filesz) + { + auto& zero = segs[nsegs++]; + zero.type = SYS_SPU_SEGMENT_TYPE_FILL; + zero.ls = prog.p_vaddr + prog.p_filesz; + zero.addr = 0; + zero.size = prog.p_memsz - seg.size; + } + } + else if (prog.p_type == SYS_SPU_SEGMENT_TYPE_INFO) + { + auto& seg = segs[nsegs++]; + seg.type = SYS_SPU_SEGMENT_TYPE_INFO; + seg.ls = prog.p_vaddr; + seg.addr = 0; + seg.size = prog.p_filesz; + } + else + { + LOG_ERROR(SPU, "Unknown program type (0x%x)", prog.p_type); } } - spu_ep = obj.header.e_entry; - sha1_finish(&ctx, output); // Format patch name @@ -68,13 +103,36 @@ void LoadSpuImage(const fs::file& stream, u32& spu_ep, u32 addr) } } -u32 LoadSpuImage(const fs::file& stream, u32& spu_ep) +void sys_spu_image::free() { - const u32 alloc_size = 256 * 1024; - u32 spu_offset = (u32)vm::alloc(alloc_size, vm::main); + if (type == SYS_SPU_IMAGE_TYPE_KERNEL) + { + vm::dealloc_verbose_nothrow(segs.addr(), vm::main); + } +} - LoadSpuImage(stream, spu_ep, spu_offset); - return spu_offset; +void sys_spu_image::deploy(u32 loc) +{ + for (int i = 0; i < nsegs; i++) + { + auto& seg = segs[i]; + + LOG_NOTICE(SPU, "*** Deploy: t=0x%x, ls=0x%x, size=0x%x, addr=0x%x", seg.type, seg.ls, seg.size, seg.addr); + + if (seg.type == SYS_SPU_SEGMENT_TYPE_COPY) + { + std::memcpy(vm::base(loc + seg.ls), vm::base(seg.addr), seg.size); + } + else if (seg.type == SYS_SPU_SEGMENT_TYPE_FILL) + { + if ((seg.ls | seg.size) % 4) + { + LOG_ERROR(SPU, "Unaligned SPU FILL type segment (ls=0x%x, size=0x%x)", seg.ls, seg.size); + } + + std::fill_n(vm::_ptr(loc + seg.ls), seg.size / 4, seg.addr); + } + } } error_code sys_spu_initialize(u32 max_usable_spu, u32 max_raw_spu) @@ -89,7 +147,7 @@ error_code sys_spu_initialize(u32 max_usable_spu, u32 max_raw_spu) return CELL_OK; } -error_code sys_spu_image_open(vm::ptr img, vm::cptr path) +error_code sys_spu_image_open(vm::ptr img, vm::cptr path) { sys_spu.warning("sys_spu_image_open(img=*0x%x, path=%s)", img, path); @@ -101,18 +159,12 @@ error_code sys_spu_image_open(vm::ptr img, vm::cptr path) return CELL_ENOENT; } - u32 entry; - u32 offset = LoadSpuImage(elf_file, entry); - - img->type = SYS_SPU_IMAGE_TYPE_USER; - img->entry_point = entry; - img->segs.set(offset); // TODO: writing actual segment info - img->nsegs = 1; // wrong value + img->load(elf_file); return CELL_OK; } -error_code sys_spu_thread_initialize(vm::ptr thread, u32 group_id, u32 spu_num, vm::ptr img, vm::ptr attr, vm::ptr arg) +error_code sys_spu_thread_initialize(vm::ptr thread, u32 group_id, u32 spu_num, vm::ptr img, vm::ptr attr, vm::ptr arg) { sys_spu.warning("sys_spu_thread_initialize(thread=*0x%x, group=0x%x, spu_num=%d, img=*0x%x, attr=*0x%x, arg=*0x%x)", thread, group_id, spu_num, img, attr, arg); @@ -151,7 +203,7 @@ error_code sys_spu_thread_initialize(vm::ptr thread, u32 group_id, u32 spu_ group->threads[spu_num] = std::move(spu); group->args[spu_num] = {arg->arg1, arg->arg2, arg->arg3, arg->arg4}; - group->images[spu_num] = img; + group->imgs[spu_num] = img; if (++group->init == group->num) { @@ -176,10 +228,7 @@ error_code sys_spu_thread_set_argument(u32 id, vm::ptr semaphore_lock lock(group->mutex); - group->args[thread->index].arg1 = arg->arg1; - group->args[thread->index].arg2 = arg->arg2; - group->args[thread->index].arg3 = arg->arg3; - group->args[thread->index].arg4 = arg->arg4; + group->args[thread->index] = {arg->arg1, arg->arg2, arg->arg3, arg->arg4}; return CELL_OK; } @@ -294,18 +343,16 @@ error_code sys_spu_thread_group_start(ppu_thread& ppu, u32 id) if (thread) { auto& args = group->args[thread->index]; - auto& image = group->images[thread->index]; + auto& img = group->imgs[thread->index]; - // Copy SPU image: - // TODO: use segment info - std::memcpy(vm::base(thread->offset), image->segs.get_ptr(), 256 * 1024); + img->deploy(thread->offset); - thread->pc = image->entry_point; + thread->pc = img->entry_point; thread->cpu_init(); - thread->gpr[3] = v128::from64(0, args.arg1); - thread->gpr[4] = v128::from64(0, args.arg2); - thread->gpr[5] = v128::from64(0, args.arg3); - thread->gpr[6] = v128::from64(0, args.arg4); + thread->gpr[3] = v128::from64(0, args[0]); + thread->gpr[4] = v128::from64(0, args[1]); + thread->gpr[5] = v128::from64(0, args[2]); + thread->gpr[6] = v128::from64(0, args[3]); thread->status.exchange(SPU_STATUS_RUNNING); } diff --git a/rpcs3/Emu/Cell/lv2/sys_spu.h b/rpcs3/Emu/Cell/lv2/sys_spu.h index 55b69da1b0..a7a3da3dd4 100644 --- a/rpcs3/Emu/Cell/lv2/sys_spu.h +++ b/rpcs3/Emu/Cell/lv2/sys_spu.h @@ -88,7 +88,7 @@ struct sys_spu_segment { be_t type; // copy, fill, info be_t ls; // local storage address - be_t size; + be_t size; union { @@ -105,12 +105,16 @@ enum : u32 SYS_SPU_IMAGE_TYPE_KERNEL = 1, }; -struct sys_spu_image_t +struct sys_spu_image { be_t type; // user, kernel be_t entry_point; vm::ps3::bptr segs; be_t nsegs; + + void load(const fs::file& stream); + void free(); + void deploy(u32 loc); }; enum : u32 @@ -119,14 +123,6 @@ enum : u32 SYS_SPU_IMAGE_DIRECT = 1, }; -struct spu_arg_t -{ - u64 arg1; - u64 arg2; - u64 arg3; - u64 arg4; -}; - // SPU Thread Group Join State Flag enum : u32 { @@ -157,8 +153,8 @@ struct lv2_spu_group cond_variable cv; // used to signal waiting PPU thread std::array, 256> threads; // SPU Threads - std::array, 256> images; // SPU Images - std::array args; // SPU Thread Arguments + std::array, 256> imgs; // SPU Images + std::array, 256> args; // SPU Thread Arguments std::weak_ptr ep_run; // port for SYS_SPU_THREAD_GROUP_EVENT_RUN events std::weak_ptr ep_exception; // TODO: SYS_SPU_THREAD_GROUP_EVENT_EXCEPTION @@ -204,15 +200,11 @@ struct lv2_spu_group class ppu_thread; -// Aux -void LoadSpuImage(const fs::file& stream, u32& spu_ep, u32 addr); -u32 LoadSpuImage(const fs::file& stream, u32& spu_ep); - // Syscalls error_code sys_spu_initialize(u32 max_usable_spu, u32 max_raw_spu); -error_code sys_spu_image_open(vm::ps3::ptr img, vm::ps3::cptr path); -error_code sys_spu_thread_initialize(vm::ps3::ptr thread, u32 group, u32 spu_num, vm::ps3::ptr, vm::ps3::ptr, vm::ps3::ptr); +error_code sys_spu_image_open(vm::ps3::ptr img, vm::ps3::cptr path); +error_code sys_spu_thread_initialize(vm::ps3::ptr thread, u32 group, u32 spu_num, vm::ps3::ptr, vm::ps3::ptr, vm::ps3::ptr); error_code sys_spu_thread_set_argument(u32 id, vm::ps3::ptr arg); error_code sys_spu_thread_group_create(vm::ps3::ptr id, u32 num, s32 prio, vm::ps3::ptr attr); error_code sys_spu_thread_group_destroy(u32 id);