diff --git a/TODO.md b/TODO.md index 35fc81ae9..76aee0383 100644 --- a/TODO.md +++ b/TODO.md @@ -43,61 +43,23 @@ indicate expected values. ## Codegen -### Branch generation - -Change style to match: http://llvm.org/docs/tutorial/LangImpl5.html -Insert check code, then push_back the branch block and implicit else after -its generated. This ensures ordering stays legit. - -### Stack variables - -Use stack variables for registers. - -- All allocas should go in the entry block. - - Lazily add or just add all registers/etc at the head. - - Must be 1 el, int64 -- Reuse through function. -- On FlushRegisters write back to state. -- FlushRegisters on indirect branch or call. - -``` -/// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of -/// the function. This is used for mutable variables etc. -static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction, - const std::string &VarName) { - IRBuilder<> TmpB(&TheFunction->getEntryBlock(), - TheFunction->getEntryBlock().begin()); - return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0, - VarName.c_str()); -} -// stash result of above and reuse -// on first use in entry get the value from state? - -// Promote allocas to registers. -OurFPM.add(createPromoteMemoryToRegisterPass()); -// Do simple "peephole" optimizations and bit-twiddling optzns. -OurFPM.add(createInstructionCombiningPass()); -// Reassociate expressions. -OurFPM.add(createReassociatePass()); -``` - -### Tracing - -- Trace kernel export info (missing/present/etc). -- Trace user call info (name/?). -- Trace instruction info (disasm). - ### Calling convention Experiment with fastcc? May need typedef fn ptrs to call into the JITted code. -nonlazybind fn attribute to prevent lazy binding (slow down startup) +### Function calling convention analysis + +Track functions to see if they follow the standard calling convention. +This could use the hints from the EH data in the XEX. Looking specifically for +stack prolog/epilog and branches to LR. + +Benefits: +- Optimized prolog/epilog generation. +- Local variables for stack storage (alloca/etc) instead of user memory. +- Better return detection and fast returns. ### Indirect branches (ctr/lr) -emit_control.cc XeEmitBranchTo -Need to take the value in LR/CTR and do something with it. - Return path: - In SDB see if the function follows the 'return' semantic: - mfspr LR / mtspr LR/CTR / bcctr -- at end? @@ -118,32 +80,6 @@ Slow path: - Call out and do an SDB lookup. - If found, return, add to lookup table, and jump. - If not found, need new function codegen! - - -If the indirect br looks like it may be local (no stack setup/etc?) then -build a jump table: - -``` -Branch register with no link: -switch i32 %nia, label %non_local [ i32 0x..., label %loc_... - i32 0x..., label %loc_... - i32 0x..., label %loc_... ] -%non_local: going outside of the function - -Could put one of these tables at the bottom of each function and share -it. -This could be done via indirectbr if branchaddress is used to stash the -address. The address must be within the function, though. - -Branch register with link: -check, never local? -``` - -### Caching of register values in basic blocks - -Right now the SSA values seem to leak from the blocks somehow. All caching -is disabled. - ``` ## Debugging diff --git a/include/xenia/cpu/codegen/function_generator.h b/include/xenia/cpu/codegen/function_generator.h index 5ca7d192a..e1bf90d57 100644 --- a/include/xenia/cpu/codegen/function_generator.h +++ b/include/xenia/cpu/codegen/function_generator.h @@ -44,16 +44,23 @@ public: void GenerateBasicBlocks(); llvm::BasicBlock* GetBasicBlock(uint32_t address); llvm::BasicBlock* GetNextBasicBlock(); + llvm::BasicBlock* GetReturnBasicBlock(); llvm::Function* GetFunction(sdb::FunctionSymbol* fn); + int GenerateIndirectionBranch(uint32_t cia, llvm::Value* target, + bool lk, bool likely_local); + llvm::Value* LoadStateValue(uint32_t offset, llvm::Type* type, const char* name = ""); void StoreStateValue(uint32_t offset, llvm::Type* type, llvm::Value* value); llvm::Value* cia_value(); - void FlushRegisters(); + llvm::Value* SetupRegisterLocal(uint32_t offset, llvm::Type* type, + const char* name); + void FillRegisters(); + void SpillRegisters(); llvm::Value* xer_value(); void update_xer_value(llvm::Value* value); @@ -74,6 +81,7 @@ public: void WriteMemory(llvm::Value* addr, uint32_t size, llvm::Value* value); private: + void GenerateSharedBlocks(); void GenerateBasicBlock(sdb::FunctionBlock* block, llvm::BasicBlock* bb); xe_memory_ref memory_; @@ -83,6 +91,9 @@ private: llvm::Module* gen_module_; llvm::Function* gen_fn_; sdb::FunctionBlock* fn_block_; + llvm::BasicBlock* return_block_; + llvm::BasicBlock* internal_indirection_block_; + llvm::BasicBlock* external_indirection_block_; llvm::BasicBlock* bb_; llvm::IRBuilder<>* builder_; @@ -92,19 +103,15 @@ private: uint32_t cia_; struct { + llvm::Value* indirection_target; + llvm::Value* indirection_cia; + llvm::Value* xer; - bool xer_dirty; llvm::Value* lr; - bool lr_dirty; llvm::Value* ctr; - bool ctr_dirty; - llvm::Value* cr; - bool cr_dirty; - llvm::Value* gpr[32]; - uint32_t gpr_dirty_bits; - } values_; + } locals_; }; diff --git a/include/xenia/cpu/ppc/state.h b/include/xenia/cpu/ppc/state.h index ddea042d5..a559fdfe9 100644 --- a/include/xenia/cpu/ppc/state.h +++ b/include/xenia/cpu/ppc/state.h @@ -40,6 +40,10 @@ // } // FPRF +#define kXEPPCRegLR 0xFFFF0001 +#define kXEPPCRegCTR 0xFFFF0002 + + typedef struct XECACHEALIGN xe_float4 { union { struct { diff --git a/private/runtest.sh b/private/runtest.sh index d00de222e..95b9dbaea 100755 --- a/private/runtest.sh +++ b/private/runtest.sh @@ -1,16 +1,15 @@ python xenia-build.py xethunk python xenia-build.py build -rm build/$1* - ./build/xenia/release/xenia-run \ private/$1 \ --optimize_ir_modules=true \ --optimize_ir_functions=false \ --trace_kernel_calls=true \ - --trace_user_calls=true \ + --trace_user_calls=false \ --trace_instructions=false \ - 2>build/run.llvm.txt 1>build/run.txt + 1>build/run.txt + #2>build/run.llvm.txt \ if [ ! -s build/run.llvm.txt ]; then rm build/run.llvm.txt diff --git a/src/cpu/codegen/emit_control.cc b/src/cpu/codegen/emit_control.cc index fa43f5438..fa72a6eed 100644 --- a/src/cpu/codegen/emit_control.cc +++ b/src/cpu/codegen/emit_control.cc @@ -10,6 +10,7 @@ #include "cpu/codegen/emit.h" #include +#include using namespace llvm; @@ -23,8 +24,51 @@ namespace cpu { namespace codegen { -int XeEmitBranchTo(FunctionGenerator& g, IRBuilder<>& b, const char* src, - uint32_t cia, bool lk) { +int XeEmitIndirectBranchTo( + FunctionGenerator& g, IRBuilder<>& b, const char* src, uint32_t cia, + bool lk, uint32_t reg) { + // TODO(benvanik): run a DFA pass to see if we can detect whether this is + // a normal function return that is pulling the LR from the stack that + // it set in the prolog. If so, we can omit the dynamic check! + + // NOTE: we avoid spilling registers until we know that the target is not + // a basic block within this function. + + Value* target; + switch (reg) { + case kXEPPCRegLR: + target = g.lr_value(); + break; + case kXEPPCRegCTR: + target = g.ctr_value(); + break; + default: + XEASSERTALWAYS(); + return 1; + } + + // Dynamic test when branching to LR, which is usually used for the return. + // We only do this if LK=0 as returns wouldn't set LR. + // Ideally it's a return and we can just do a simple ret and be done. + // If it's not, we fall through to the full indirection logic. + if (!lk && reg == kXEPPCRegLR) { + BasicBlock* next_block = g.GetNextBasicBlock(); + BasicBlock* mismatch_bb = BasicBlock::Create(*g.context(), "lr_mismatch", + g.gen_fn(), next_block); + Value* lr_cmp = b.CreateICmpEQ(target, ++(g.gen_fn()->arg_begin())); + // The return block will spill registers for us. + b.CreateCondBr(lr_cmp, g.GetReturnBasicBlock(), mismatch_bb); + b.SetInsertPoint(mismatch_bb); + } + + // Defer to the generator, which will do fancy things. + bool likely_local = !lk && reg == kXEPPCRegCTR; + return g.GenerateIndirectionBranch(cia, target, lk, likely_local); +} + +int XeEmitBranchTo( + FunctionGenerator& g, IRBuilder<>& b, const char* src, uint32_t cia, + bool lk) { // Get the basic block and switch behavior based on outgoing type. FunctionBlock* fn_block = g.fn_block(); switch (fn_block->outgoing_type) { @@ -37,21 +81,24 @@ int XeEmitBranchTo(FunctionGenerator& g, IRBuilder<>& b, const char* src, } case FunctionBlock::kTargetFunction: { + // Spill all registers to memory. + // TODO(benvanik): only spill ones used by the target function? Use + // calling convention flags on the function to not spill temp + // registers? + g.SpillRegisters(); + Function* target_fn = g.GetFunction(fn_block->outgoing_function); Function::arg_iterator args = g.gen_fn()->arg_begin(); - Value* statePtr = args; - b.CreateCall(target_fn, statePtr); - if (!lk) { - // Tail. + Value* state_ptr = args; + b.CreateCall2(target_fn, state_ptr, b.getInt64(cia + 4)); + BasicBlock* next_bb = g.GetNextBasicBlock(); + if (!lk || !next_bb) { + // Tail. No need to refill the local register values, just return. b.CreateRetVoid(); } else { - BasicBlock* next_bb = g.GetNextBasicBlock(); - if (next_bb) { - b.CreateBr(next_bb); - } else { - // ? - b.CreateRetVoid(); - } + // Refill registers from state. + g.FillRegisters(); + b.CreateBr(next_bb); } break; } @@ -59,15 +106,13 @@ int XeEmitBranchTo(FunctionGenerator& g, IRBuilder<>& b, const char* src, { // An indirect jump. printf("INDIRECT JUMP VIA LR: %.8X\n", cia); - b.CreateRetVoid(); - break; + return XeEmitIndirectBranchTo(g, b, src, cia, lk, kXEPPCRegLR); } case FunctionBlock::kTargetCTR: { // An indirect jump. printf("INDIRECT JUMP VIA CTR: %.8X\n", cia); - b.CreateRetVoid(); - break; + return XeEmitIndirectBranchTo(g, b, src, cia, lk, kXEPPCRegCTR); } default: case FunctionBlock::kTargetNone: @@ -95,8 +140,6 @@ XEEMITTER(bx, 0x48000000, I )(FunctionGenerator& g, IRBuilder<>& b, I g.update_lr_value(b.getInt32(i.address + 4)); } - g.FlushRegisters(); - return XeEmitBranchTo(g, b, "bx", i.address, i.I.LK); } @@ -113,6 +156,10 @@ XEEMITTER(bcx, 0x40000000, B )(FunctionGenerator& g, IRBuilder<>& b, I // if LK then // LR <- CIA + 4 + // NOTE: the condition bits are reversed! + // 01234 (docs) + // 43210 (real) + // TODO(benvanik): this may be wrong and overwrite LRs when not desired! // The docs say always, though... if (i.B.LK) { @@ -120,7 +167,7 @@ XEEMITTER(bcx, 0x40000000, B )(FunctionGenerator& g, IRBuilder<>& b, I } Value* ctr_ok = NULL; - if (XESELECTBITS(i.B.BO, 4, 4)) { + if (XESELECTBITS(i.B.BO, 2, 2)) { // Ignore ctr. } else { // Decrement counter. @@ -129,7 +176,7 @@ XEEMITTER(bcx, 0x40000000, B )(FunctionGenerator& g, IRBuilder<>& b, I ctr = b.CreateSub(ctr, b.getInt64(1)); // Ctr check. - if (XESELECTBITS(i.B.BO, 3, 3)) { + if (XESELECTBITS(i.B.BO, 1, 1)) { ctr_ok = b.CreateICmpEQ(ctr, b.getInt64(0)); } else { ctr_ok = b.CreateICmpNE(ctr, b.getInt64(0)); @@ -159,7 +206,6 @@ XEEMITTER(bcx, 0x40000000, B )(FunctionGenerator& g, IRBuilder<>& b, I ok = cond_ok; } - g.FlushRegisters(); // Handle unconditional branches without extra fluff. BasicBlock* original_bb = b.GetInsertBlock(); if (ok) { @@ -196,6 +242,10 @@ XEEMITTER(bcctrx, 0x4C000420, XL )(FunctionGenerator& g, IRBuilder<>& b, I // if LK then // LR <- CIA + 4 + // NOTE: the condition bits are reversed! + // 01234 (docs) + // 43210 (real) + // TODO(benvanik): this may be wrong and overwrite LRs when not desired! // The docs say always, though... if (i.XL.LK) { @@ -221,8 +271,6 @@ XEEMITTER(bcctrx, 0x4C000420, XL )(FunctionGenerator& g, IRBuilder<>& b, I ok = cond_ok; } - g.FlushRegisters(); - // Handle unconditional branches without extra fluff. BasicBlock* original_bb = b.GetInsertBlock(); if (ok) { @@ -257,6 +305,10 @@ XEEMITTER(bclrx, 0x4C000020, XL )(FunctionGenerator& g, IRBuilder<>& b, I // if LK then // LR <- CIA + 4 + // NOTE: the condition bits are reversed! + // 01234 (docs) + // 43210 (real) + // TODO(benvanik): this may be wrong and overwrite LRs when not desired! // The docs say always, though... if (i.XL.LK) { @@ -264,7 +316,7 @@ XEEMITTER(bclrx, 0x4C000020, XL )(FunctionGenerator& g, IRBuilder<>& b, I } Value* ctr_ok = NULL; - if (XESELECTBITS(i.XL.BO, 4, 4)) { + if (XESELECTBITS(i.XL.BO, 2, 2)) { // Ignore ctr. } else { // Decrement counter. @@ -273,7 +325,7 @@ XEEMITTER(bclrx, 0x4C000020, XL )(FunctionGenerator& g, IRBuilder<>& b, I ctr = b.CreateSub(ctr, b.getInt64(1)); // Ctr check. - if (XESELECTBITS(i.XL.BO, 3, 3)) { + if (XESELECTBITS(i.XL.BO, 1, 1)) { ctr_ok = b.CreateICmpEQ(ctr, b.getInt64(0)); } else { ctr_ok = b.CreateICmpNE(ctr, b.getInt64(0)); @@ -303,8 +355,6 @@ XEEMITTER(bclrx, 0x4C000020, XL )(FunctionGenerator& g, IRBuilder<>& b, I ok = cond_ok; } - g.FlushRegisters(); - // Handle unconditional branches without extra fluff. BasicBlock* original_bb = b.GetInsertBlock(); if (ok) { diff --git a/src/cpu/codegen/function_generator.cc b/src/cpu/codegen/function_generator.cc index 162067271..98fd4d8ad 100644 --- a/src/cpu/codegen/function_generator.cc +++ b/src/cpu/codegen/function_generator.cc @@ -49,8 +49,22 @@ FunctionGenerator::FunctionGenerator( gen_module_ = gen_module; gen_fn_ = gen_fn; builder_ = new IRBuilder<>(*context_); + fn_block_ = NULL; + return_block_ = NULL; + internal_indirection_block_ = NULL; + external_indirection_block_ = NULL; + bb_ = NULL; - xe_zero_struct(&values_, sizeof(values_)); + locals_.indirection_target = NULL; + locals_.indirection_cia = NULL; + + locals_.xer = NULL; + locals_.lr = NULL; + locals_.ctr = NULL; + locals_.cr = NULL; + for (size_t n = 0; n < XECOUNT(locals_.gpr); n++) { + locals_.gpr[n] = NULL; + } } FunctionGenerator::~FunctionGenerator() { @@ -87,12 +101,13 @@ void FunctionGenerator::GenerateBasicBlocks() { builder_->SetInsertPoint(entry); if (FLAGS_trace_user_calls) { + SpillRegisters(); Value* traceUserCall = gen_module_->getGlobalVariable("XeTraceUserCall"); builder_->CreateCall3( traceUserCall, gen_fn_->arg_begin(), - builder_->getInt32(fn_->start_address), - builder_->getInt32(0)); + builder_->getInt64(fn_->start_address), + ++gen_fn_->arg_begin()); } // If this function is empty, abort! @@ -101,6 +116,11 @@ void FunctionGenerator::GenerateBasicBlocks() { return; } + // Create a return block. + // This spills registers and returns. All non-tail returns should branch + // here to do the return and ensure registers are spilled. + return_block_ = BasicBlock::Create(*context_, "return", gen_fn_); + // Pass 1 creates all of the blocks - this way we can branch to them. for (std::map::iterator it = fn_->blocks.begin(); it != fn_->blocks.end(); ++it) { @@ -122,6 +142,50 @@ void FunctionGenerator::GenerateBasicBlocks() { FunctionBlock* block = it->second; GenerateBasicBlock(block, GetBasicBlock(block->start_address)); } + + // Setup the shared return/indirection/etc blocks now that we know all the + // blocks we need and all the registers used. + GenerateSharedBlocks(); +} + +void FunctionGenerator::GenerateSharedBlocks() { + IRBuilder<>& b = *builder_; + + Value* indirect_branch = gen_module_->getGlobalVariable("XeIndirectBranch"); + + // Setup the spill block in return. + b.SetInsertPoint(return_block_); + SpillRegisters(); + b.CreateRetVoid(); + + // Build indirection block on demand. + // We have already prepped all basic blocks, so we can build these tables now. + if (external_indirection_block_) { + // This will spill registers and call the external function. + // It is only meant for LK=0. + b.SetInsertPoint(external_indirection_block_); + SpillRegisters(); + b.CreateCall3(indirect_branch, + gen_fn_->arg_begin(), + b.CreateLoad(locals_.indirection_target), + b.CreateLoad(locals_.indirection_cia)); + b.CreateRetVoid(); + } + + if (internal_indirection_block_) { + // This will not spill registers and instead try to switch on local blocks. + // If it fails then the external indirection path is taken. + // NOTE: we only generate this if a likely local branch is taken. + b.SetInsertPoint(internal_indirection_block_); + SwitchInst* switch_i = b.CreateSwitch( + b.CreateLoad(locals_.indirection_target), + external_indirection_block_, + bbs_.size()); + for (std::map::iterator it = bbs_.begin(); + it != bbs_.end(); ++it) { + switch_i->addCase(b.getInt64(it->first), it->second); + } + } } void FunctionGenerator::GenerateBasicBlock(FunctionBlock* block, @@ -147,6 +211,7 @@ void FunctionGenerator::GenerateBasicBlock(FunctionBlock* block, i.type = ppc::GetInstrType(i.code); if (FLAGS_trace_instructions) { + SpillRegisters(); builder_->CreateCall3( traceInstruction, gen_fn_->arg_begin(), @@ -176,10 +241,6 @@ void FunctionGenerator::GenerateBasicBlock(FunctionBlock* block, // If we fall through, create the branch. if (block->outgoing_type == FunctionBlock::kTargetNone) { - // Flush registers. - // TODO(benvanik): only do this before jumps out. - FlushRegisters(); - BasicBlock* next_bb = GetNextBasicBlock(); XEASSERTNOTNULL(next_bb); builder_->CreateBr(next_bb); @@ -212,6 +273,10 @@ BasicBlock* FunctionGenerator::GetNextBasicBlock() { return NULL; } +BasicBlock* FunctionGenerator::GetReturnBasicBlock() { + return return_block_; +} + Function* FunctionGenerator::GetFunction(FunctionSymbol* fn) { Function* result = gen_module_->getFunction(StringRef(fn->name)); if (!result) { @@ -221,6 +286,94 @@ Function* FunctionGenerator::GetFunction(FunctionSymbol* fn) { return result; } +int FunctionGenerator::GenerateIndirectionBranch(uint32_t cia, Value* target, + bool lk, bool likely_local) { + // This function is called by the control emitters when they know that an + // indirect branch is required. + // It first tries to see if the branch is to an address within the function + // and, if so, uses a local switch table. If that fails because we don't know + // the block the function is regenerated (ACK!). If the target is external + // then an external call occurs. + + IRBuilder<>& b = *builder_; + BasicBlock* next_block = GetNextBasicBlock(); + + BasicBlock* insert_bb = b.GetInsertBlock(); + BasicBlock::iterator insert_bbi = b.GetInsertPoint(); + + // Request builds of the indirection blocks on demand. + // We can't build here because we don't know what registers will be needed + // yet, so we just create the blocks and let GenerateSharedBlocks handle it + // after we are done with all user instructions. + if (!external_indirection_block_) { + // Setup locals in the entry block. + builder_->SetInsertPoint(&gen_fn_->getEntryBlock(), + gen_fn_->getEntryBlock().begin()); + locals_.indirection_target = b.CreateAlloca( + b.getInt64Ty(), 0, "indirection_target"); + locals_.indirection_cia = b.CreateAlloca( + b.getInt64Ty(), 0, "indirection_cia"); + + external_indirection_block_ = BasicBlock::Create( + *context_, "external_indirection_block", gen_fn_, return_block_); + } + if (likely_local && !internal_indirection_block_) { + internal_indirection_block_ = BasicBlock::Create( + *context_, "internal_indirection_block", gen_fn_, return_block_); + } + + b.SetInsertPoint(insert_bb, insert_bbi); + + // Check to see if the target address is within the function. + // If it is jump to that basic block. If the basic block is not found it means + // we have a jump inside the function that wasn't identified via static + // analysis. These are bad as they require function regeneration. + if (likely_local) { + // Note that we only support LK=0, as we are using shared tables. + XEASSERT(!lk); + b.CreateStore(target, locals_.indirection_target); + b.CreateStore(b.getInt64(cia), locals_.indirection_cia); + Value* fn_ge_cmp = b.CreateICmpUGE(target, b.getInt64(fn_->start_address)); + Value* fn_l_cmp = b.CreateICmpULT(target, b.getInt64(fn_->end_address)); + Value* fn_target_cmp = b.CreateAnd(fn_ge_cmp, fn_l_cmp); + b.CreateCondBr(fn_target_cmp, + internal_indirection_block_, external_indirection_block_); + return 0; + } + + // If we are LK=0 jump to the shared indirection block. This prevents us + // from needing to fill the registers again after the call and shares more + // code. + if (!lk) { + b.CreateStore(target, locals_.indirection_target); + b.CreateStore(b.getInt64(cia), locals_.indirection_cia); + b.CreateBr(external_indirection_block_); + } else { + // Slowest path - spill, call the external function, and fill. + // We should avoid this at all costs. + + // Spill registers. We could probably share this. + SpillRegisters(); + + // TODO(benvanik): keep function pointer lookup local. + Value* indirect_branch = gen_module_->getGlobalVariable("XeIndirectBranch"); + b.CreateCall3(indirect_branch, + gen_fn_->arg_begin(), + target, + b.getInt64(cia)); + + if (next_block) { + // Only refill if not a tail call. + FillRegisters(); + b.CreateBr(next_block); + } else { + b.CreateRetVoid(); + } + } + + return 0; +} + Value* FunctionGenerator::LoadStateValue(uint32_t offset, Type* type, const char* name) { PointerType* pointerTy = PointerType::getUnqual(type); @@ -240,12 +393,6 @@ void FunctionGenerator::StoreStateValue(uint32_t offset, Type* type, Value* address = builder_->CreateConstInBoundsGEP1_64( statePtr, offset); Value* ptr = builder_->CreatePointerCast(address, pointerTy); - - // Widen to target type if needed. - if (!value->getType()->isIntegerTy(type->getIntegerBitWidth())) { - value = builder_->CreateZExt(value, type); - } - builder_->CreateStore(value, ptr); } @@ -253,184 +400,225 @@ Value* FunctionGenerator::cia_value() { return builder_->getInt32(cia_); } -void FunctionGenerator::FlushRegisters() { +Value* FunctionGenerator::SetupRegisterLocal(uint32_t offset, llvm::Type* type, + const char* name) { + // Insert into the entry block. + BasicBlock* insert_bb = builder_->GetInsertBlock(); + BasicBlock::iterator insert_bbi = builder_->GetInsertPoint(); + builder_->SetInsertPoint(&gen_fn_->getEntryBlock(), + gen_fn_->getEntryBlock().begin()); + + Value* v = builder_->CreateAlloca(type, 0, name); + builder_->CreateStore(LoadStateValue(offset, type), v); + + builder_->SetInsertPoint(insert_bb, insert_bbi); + return v; +} + +void FunctionGenerator::FillRegisters() { + // This updates all of the local register values from the state memory. + // It should be called on function entry for initial setup and after any + // calls that may modify the registers. + + if (locals_.xer) { + builder_->CreateStore(LoadStateValue( + offsetof(xe_ppc_state_t, xer), + builder_->getInt64Ty()), locals_.xer); + } + + if (locals_.lr) { + builder_->CreateStore(LoadStateValue( + offsetof(xe_ppc_state_t, lr), + builder_->getInt64Ty()), locals_.lr); + } + + if (locals_.ctr) { + builder_->CreateStore(LoadStateValue( + offsetof(xe_ppc_state_t, ctr), + builder_->getInt64Ty()), locals_.ctr); + } + + if (locals_.cr) { + builder_->CreateStore(LoadStateValue( + offsetof(xe_ppc_state_t, cr), + builder_->getInt64Ty()), locals_.cr); + } + + // Note that we skip zero. + for (uint32_t n = 1; n < XECOUNT(locals_.gpr); n++) { + if (locals_.gpr[n]) { + builder_->CreateStore(LoadStateValue( + offsetof(xe_ppc_state_t, r) + 8 * n, + builder_->getInt64Ty()), locals_.gpr[n]); + } + } +} + +void FunctionGenerator::SpillRegisters() { // This flushes all local registers (if written) to the register bank and // resets their values. // // TODO(benvanik): only flush if actually required, or selective flushes. - // xer + if (locals_.xer) { + StoreStateValue( + offsetof(xe_ppc_state_t, xer), + builder_->getInt64Ty(), + builder_->CreateLoad(locals_.xer)); + } - if (values_.lr && values_.lr_dirty) { + if (locals_.lr) { StoreStateValue( offsetof(xe_ppc_state_t, lr), builder_->getInt64Ty(), - values_.lr); - values_.lr = NULL; - values_.lr_dirty = false; + builder_->CreateLoad(locals_.lr)); } - if (values_.ctr && values_.ctr_dirty) { + if (locals_.ctr) { StoreStateValue( offsetof(xe_ppc_state_t, ctr), builder_->getInt64Ty(), - values_.ctr); - values_.ctr = NULL; - values_.ctr_dirty = false; + builder_->CreateLoad(locals_.ctr)); } // TODO(benvanik): don't flush across calls? - if (values_.cr && values_.cr_dirty) { + if (locals_.cr) { StoreStateValue( offsetof(xe_ppc_state_t, cr), builder_->getInt64Ty(), - values_.cr); - values_.cr = NULL; - values_.cr_dirty = false; + builder_->CreateLoad(locals_.cr)); } - for (uint32_t n = 0; n < XECOUNT(values_.gpr); n++) { - Value* v = values_.gpr[n]; - if (v && (values_.gpr_dirty_bits & (1 << n))) { + // Note that we skip zero. + for (uint32_t n = 1; n < XECOUNT(locals_.gpr); n++) { + Value* v = locals_.gpr[n]; + if (v) { StoreStateValue( offsetof(xe_ppc_state_t, r) + 8 * n, builder_->getInt64Ty(), - values_.gpr[n]); - values_.gpr[n] = NULL; + builder_->CreateLoad(locals_.gpr[n])); } } - values_.gpr_dirty_bits = 0; } Value* FunctionGenerator::xer_value() { - if (true) {//!values_.xer) { - // Fetch from register bank. - Value* v = LoadStateValue( + if (!locals_.xer) { + locals_.xer = SetupRegisterLocal( offsetof(xe_ppc_state_t, xer), builder_->getInt64Ty(), - "xer_"); - values_.xer = v; - return v; - } else { - // Return local. - return values_.xer; + "xer"); } + return locals_.xer; } void FunctionGenerator::update_xer_value(Value* value) { - // Widen to 64bits if needed. + // Ensure the register is local. + xer_value(); + + // Extend to 64bits if needed. if (!value->getType()->isIntegerTy(64)) { value = builder_->CreateZExt(value, builder_->getInt64Ty()); } - - values_.xer = value; - values_.xer_dirty = true; + builder_->CreateStore(value, locals_.xer); } Value* FunctionGenerator::lr_value() { - if (true) {//!values_.lr) { - // Fetch from register bank. - Value* v = LoadStateValue( + if (!locals_.lr) { + locals_.lr = SetupRegisterLocal( offsetof(xe_ppc_state_t, lr), builder_->getInt64Ty(), - "lr_"); - values_.lr = v; - return v; - } else { - // Return local. - return values_.lr; + "lr"); } + return builder_->CreateLoad(locals_.lr); } void FunctionGenerator::update_lr_value(Value* value) { - // Widen to 64bits if needed. + // Ensure the register is local. + lr_value(); + + // Extend to 64bits if needed. if (!value->getType()->isIntegerTy(64)) { value = builder_->CreateZExt(value, builder_->getInt64Ty()); } - - values_.lr = value; - values_.lr_dirty = true; + builder_->CreateStore(value, locals_.lr); } Value* FunctionGenerator::ctr_value() { - if (true) {//!values_.ctr) { - // Fetch from register bank. - Value* v = LoadStateValue( + if (!locals_.ctr) { + locals_.ctr = SetupRegisterLocal( offsetof(xe_ppc_state_t, ctr), builder_->getInt64Ty(), - "ctr_"); - values_.ctr = v; - return v; - } else { - // Return local. - return values_.ctr; + "ctr"); } + return builder_->CreateLoad(locals_.ctr); } void FunctionGenerator::update_ctr_value(Value* value) { - // Widen to 64bits if needed. + // Ensure the register is local. + ctr_value(); + + // Extend to 64bits if needed. if (!value->getType()->isIntegerTy(64)) { value = builder_->CreateZExt(value, builder_->getInt64Ty()); } - - values_.ctr = value; - values_.ctr_dirty = true; + builder_->CreateStore(value, locals_.ctr); } Value* FunctionGenerator::cr_value() { - if (true) {//!values_.cr) { - // Fetch from register bank. - Value* v = LoadStateValue( + if (!locals_.cr) { + locals_.cr = SetupRegisterLocal( offsetof(xe_ppc_state_t, cr), builder_->getInt64Ty(), - "cr_"); - values_.cr = v; - return v; - } else { - // Return local. - return values_.cr; + "cr"); } + return builder_->CreateLoad(locals_.cr); } void FunctionGenerator::update_cr_value(Value* value) { - values_.cr = value; - values_.cr_dirty = true; + // Ensure the register is local. + cr_value(); + + // Extend to 64bits if needed. + if (!value->getType()->isIntegerTy(64)) { + value = builder_->CreateZExt(value, builder_->getInt64Ty()); + } + builder_->CreateStore(value, locals_.cr); } Value* FunctionGenerator::gpr_value(uint32_t n) { + XEASSERT(n >= 0 && n < 32); if (n == 0) { // Always force zero to a constant - this should help LLVM. return builder_->getInt64(0); } - if (true) {//!values_.gpr[n]) { - // Need to fetch from register bank. + if (!locals_.gpr[n]) { char name[30]; - xesnprintfa(name, XECOUNT(name), "gpr_r%d_", n); - Value* v = LoadStateValue( + xesnprintfa(name, XECOUNT(name), "gpr_r%d", n); + locals_.gpr[n] = SetupRegisterLocal( offsetof(xe_ppc_state_t, r) + 8 * n, builder_->getInt64Ty(), name); - values_.gpr[n] = v; - return v; - } else { - // Local value, reuse. - return values_.gpr[n]; } + return builder_->CreateLoad(locals_.gpr[n]); } void FunctionGenerator::update_gpr_value(uint32_t n, Value* value) { + XEASSERT(n >= 0 && n < 32); + if (n == 0) { // Ignore writes to zero. return; } - // Widen to 64bits if needed. + // Ensure the register is local. + gpr_value(n); + + // Extend to 64bits if needed. if (!value->getType()->isIntegerTy(64)) { value = builder_->CreateZExt(value, builder_->getInt64Ty()); } - values_.gpr[n] = value; - values_.gpr_dirty_bits |= 1 << n; + builder_->CreateStore(value, locals_.gpr[n]); } Value* FunctionGenerator::GetMembase() { diff --git a/src/cpu/codegen/module_generator.cc b/src/cpu/codegen/module_generator.cc index 71c6efbca..8f2244c17 100644 --- a/src/cpu/codegen/module_generator.cc +++ b/src/cpu/codegen/module_generator.cc @@ -138,6 +138,7 @@ Function* ModuleGenerator::CreateFunctionDefinition(const char* name) { std::vector args; args.push_back(PointerType::getUnqual(Type::getInt8Ty(context))); + args.push_back(Type::getInt64Ty(context)); Type* return_type = Type::getVoidTy(context); FunctionType* ft = FunctionType::get(return_type, @@ -159,11 +160,16 @@ Function* ModuleGenerator::CreateFunctionDefinition(const char* name) { Value* fn_arg = fn_args++; fn_arg->setName("state"); f->setDoesNotAlias(1); + f->setDoesNotCapture(1); // 'state' should try to be in a register, if possible. // TODO(benvanik): verify that's a good idea. // f->getArgumentList().begin()->addAttr( // Attribute::get(context, AttrBuilder().addAttribute(Attribute::InReg))); + // 'lr' + fn_arg = fn_args++; + fn_arg->setName("lr"); + return f; }; @@ -183,8 +189,8 @@ void ModuleGenerator::AddMissingImport(FunctionSymbol* fn) { builder.CreateCall3( traceKernelCall, f->arg_begin(), - builder.getInt32(fn->start_address), - builder.getInt32(0)); + builder.getInt64(fn->start_address), + ++f->arg_begin()); } builder.CreateRetVoid(); diff --git a/src/cpu/exec_module.cc b/src/cpu/exec_module.cc index a869a9428..01170e22e 100644 --- a/src/cpu/exec_module.cc +++ b/src/cpu/exec_module.cc @@ -202,14 +202,19 @@ XECLEANUP: return result_code; } -void XeTraceKernelCall(xe_ppc_state_t* state, uint32_t cia, uint32_t call_ia) { - // TODO(benvanik): get names - XELOGCPU("TRACE: %.8X -> k.%.8X", call_ia, cia); +void XeIndirectBranch(xe_ppc_state_t* state, uint64_t target, uint64_t br_ia) { + printf("INDIRECT BRANCH %.8X -> %.8X\n", (uint32_t)br_ia, (uint32_t)target); + XEASSERTALWAYS(); } -void XeTraceUserCall(xe_ppc_state_t* state, uint32_t cia, uint32_t call_ia) { +void XeTraceKernelCall(xe_ppc_state_t* state, uint64_t cia, uint64_t call_ia) { // TODO(benvanik): get names - XELOGCPU("TRACE: %.8X -> u.%.8X", call_ia, cia); + XELOGCPU("TRACE: %.8X -> k.%.8X", (uint32_t)call_ia, (uint32_t)cia); +} + +void XeTraceUserCall(xe_ppc_state_t* state, uint64_t cia, uint64_t call_ia) { + // TODO(benvanik): get names + XELOGCPU("TRACE: %.8X -> u.%.8X", (uint32_t)call_ia, (uint32_t)cia); } void XeTraceInstruction(xe_ppc_state_t* state, uint32_t cia, uint32_t data) { @@ -244,11 +249,23 @@ int ExecModule::InjectGlobals() { ConstantInt::get(intPtrTy, (uintptr_t)xe_memory_addr(memory_, 0)), int8PtrTy)); + // Control methods: + std::vector indirectBranchArgs; + indirectBranchArgs.push_back(int8PtrTy); + indirectBranchArgs.push_back(Type::getInt64Ty(context)); + indirectBranchArgs.push_back(Type::getInt64Ty(context)); + FunctionType* indirectBranchTy = FunctionType::get( + Type::getVoidTy(context), indirectBranchArgs, false); + gv = new GlobalVariable(*gen_module_, indirectBranchTy, true, + GlobalVariable::ExternalLinkage, 0, + "XeIndirectBranch"); + engine_->addGlobalMapping(gv, (void*)&XeIndirectBranch); + // Tracing methods: std::vector traceCallArgs; traceCallArgs.push_back(int8PtrTy); - traceCallArgs.push_back(Type::getInt32Ty(context)); - traceCallArgs.push_back(Type::getInt32Ty(context)); + traceCallArgs.push_back(Type::getInt64Ty(context)); + traceCallArgs.push_back(Type::getInt64Ty(context)); FunctionType* traceCallTy = FunctionType::get( Type::getVoidTy(context), traceCallArgs, false); std::vector traceInstructionArgs; diff --git a/src/cpu/sdb.cc b/src/cpu/sdb.cc index 629c9a02a..57eef2ab9 100644 --- a/src/cpu/sdb.cc +++ b/src/cpu/sdb.cc @@ -588,6 +588,20 @@ int SymbolDatabase::AnalyzeFunction(FunctionSymbol* fn) { ends_fn = true; } ends_block = true; + } else if (i.code == 0x4E800420) { + // bctr -- unconditional branch to CTR. + // This is generally a jump to a function pointer (non-return). + block->outgoing_type = FunctionBlock::kTargetCTR; + if (furthest_target > addr) { + // Remaining targets within function, not end. + XELOGSDB("ignoring bctr %.8X (branch to %.8X)\n", addr, + furthest_target); + } else { + // Function end point. + XELOGSDB("function end %.8X\n", addr); + ends_fn = true; + } + ends_block = true; } else if (i.type->opcode == 0x48000000) { // b/ba/bl/bla uint32_t target = XEEXTS26(i.I.LI << 2) + (i.I.AA ? 0 : (int32_t)addr);