From ac473eb400acd9c6730ae8954bcd2cd4473cb005 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Fri, 19 Apr 2019 14:28:27 +0300 Subject: [PATCH] Rewrite cpu_translator::rol, add fshl and fshr Use new funnel shift intrinsics --- rpcs3/Emu/CPU/CPUTranslator.h | 134 +++++++++++++++++++++++++++++-- rpcs3/Emu/Cell/PPUTranslator.cpp | 12 +-- rpcs3/Emu/Cell/SPURecompiler.cpp | 36 ++++----- 3 files changed, 153 insertions(+), 29 deletions(-) diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h index bc6fcbb233..bf63c2841b 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.h +++ b/rpcs3/Emu/CPU/CPUTranslator.h @@ -667,6 +667,120 @@ inline llvm_shr::type>> operator >> return {a1, {c}}; } +template > +struct llvm_fshl +{ + using type = T; + + llvm_expr_t a1; + llvm_expr_t a2; + llvm_expr_t a3; + + static_assert(llvm_value_t::is_sint || llvm_value_t::is_uint, "llvm_fshl<>: invalid type"); + + static constexpr bool is_ok = llvm_value_t::is_sint || llvm_value_t::is_uint; + + static llvm::Function* get_fshl(llvm::IRBuilder<>* ir) + { + const auto module = ir->GetInsertBlock()->getParent()->getParent(); + return llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::fshl, {llvm_value_t::get_type(ir->getContext())}); + } + + static llvm::Value* fold(llvm::IRBuilder<>* ir, llvm::Value* v1, llvm::Value* v2, llvm::Value* v3) + { + // Compute constant result. + const u64 size = v3->getType()->getScalarSizeInBits(); + const auto val = ir->CreateURem(v3, llvm::ConstantInt::get(v3->getType(), size)); + const auto shl = ir->CreateShl(v1, val); + const auto shr = ir->CreateLShr(v2, ir->CreateSub(llvm::ConstantInt::get(v3->getType(), size - 1), val)); + return ir->CreateOr(shl, ir->CreateLShr(shr, 1)); + } + + llvm::Value* eval(llvm::IRBuilder<>* ir) const + { + const auto v1 = a1.eval(ir); + const auto v2 = a2.eval(ir); + const auto v3 = a3.eval(ir); + + if (llvm::isa(v1) && llvm::isa(v2) && llvm::isa(v3)) + { + return fold(ir, v1, v2, v3); + } + + return ir->CreateCall(get_fshl(ir), {v1, v2, v3}); + } +}; + +template > +struct llvm_fshr +{ + using type = T; + + llvm_expr_t a1; + llvm_expr_t a2; + llvm_expr_t a3; + + static_assert(llvm_value_t::is_sint || llvm_value_t::is_uint, "llvm_fshr<>: invalid type"); + + static constexpr bool is_ok = llvm_value_t::is_sint || llvm_value_t::is_uint; + + static llvm::Function* get_fshr(llvm::IRBuilder<>* ir) + { + const auto module = ir->GetInsertBlock()->getParent()->getParent(); + return llvm::Intrinsic::getDeclaration(module, llvm::Intrinsic::fshr, {llvm_value_t::get_type(ir->getContext())}); + } + + static llvm::Value* fold(llvm::IRBuilder<>* ir, llvm::Value* v1, llvm::Value* v2, llvm::Value* v3) + { + // Compute constant result. + const u64 size = v3->getType()->getScalarSizeInBits(); + const auto val = ir->CreateURem(v3, llvm::ConstantInt::get(v3->getType(), size)); + const auto shr = ir->CreateLShr(v2, val); + const auto shl = ir->CreateShl(v1, ir->CreateSub(llvm::ConstantInt::get(v3->getType(), size - 1), val)); + return ir->CreateOr(shr, ir->CreateShl(shl, 1)); + } + + llvm::Value* eval(llvm::IRBuilder<>* ir) const + { + const auto v1 = a1.eval(ir); + const auto v2 = a2.eval(ir); + const auto v3 = a3.eval(ir); + + if (llvm::isa(v1) && llvm::isa(v2) && llvm::isa(v3)) + { + return fold(ir, v1, v2, v3); + } + + return ir->CreateCall(get_fshr(ir), {v1, v2, v3}); + } +}; + +template > +struct llvm_rol +{ + using type = T; + + llvm_expr_t a1; + llvm_expr_t a2; + + static_assert(llvm_value_t::is_sint || llvm_value_t::is_uint, "llvm_rol<>: invalid type"); + + static constexpr bool is_ok = llvm_value_t::is_sint || llvm_value_t::is_uint; + + llvm::Value* eval(llvm::IRBuilder<>* ir) const + { + const auto v1 = a1.eval(ir); + const auto v2 = a2.eval(ir); + + if (llvm::isa(v1) && llvm::isa(v2)) + { + return llvm_fshl::fold(ir, v1, v1, v2); + } + + return ir->CreateCall(llvm_fshl::get_fshl(ir), {v1, v1, v2}); + } +}; + template > struct llvm_and { @@ -1296,12 +1410,22 @@ public: return llvm_max{std::forward(a), std::forward(b)}; } - // Rotate left - template - static inline auto rol(T a, T b) + template ::is_ok>> + static auto fshl(T&& a, U&& b, V&& c) { - static constexpr u64 mask = value_t::esize - 1; - return a << (b & mask) | a >> (-b & mask); + return llvm_fshl{std::forward(a), std::forward(b), std::forward(c)}; + } + + template ::is_ok>> + static auto fshr(T&& a, U&& b, V&& c) + { + return llvm_fshr{std::forward(a), std::forward(b), std::forward(c)}; + } + + template ::is_ok>> + static auto rol(T&& a, U&& b) + { + return llvm_rol{std::forward(a), std::forward(b)}; } // Add with saturation diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index 353de8b475..dddb65b1cd 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -1301,20 +1301,20 @@ void PPUTranslator::VRFIZ(ppu_opcode_t op) void PPUTranslator::VRLB(ppu_opcode_t op) { - const auto ab = GetVrs(VrType::vi8, op.va, op.vb); - SetVr(op.vd, RotateLeft(ab[0], ab[1])); + const auto [a, b] = get_vrs(op.va, op.vb); + set_vr(op.vd, rol(a, b)); } void PPUTranslator::VRLH(ppu_opcode_t op) { - const auto ab = GetVrs(VrType::vi16, op.va, op.vb); - SetVr(op.vd, RotateLeft(ab[0], ab[1])); + const auto [a, b] = get_vrs(op.va, op.vb); + set_vr(op.vd, rol(a, b)); } void PPUTranslator::VRLW(ppu_opcode_t op) { - const auto ab = GetVrs(VrType::vi32, op.va, op.vb); - SetVr(op.vd, RotateLeft(ab[0], ab[1])); + const auto [a, b] = get_vrs(op.va, op.vb); + set_vr(op.vd, rol(a, b)); } void PPUTranslator::VRSQRTEFP(ppu_opcode_t op) diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index f3cb92ecd0..860025a872 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -5141,23 +5141,23 @@ public: void ROTQBI(spu_opcode_t op) { - const auto a = get_vr(op.ra); - const auto b = eval((get_vr(op.rb) >> 32) & 0x7); - set_vr(op.rt, a << zshuffle(b, 1, 1) | zshuffle(a, 1, 0) >> 56 >> zshuffle(8 - b, 1, 1)); + const auto a = get_vr(op.ra); + const auto b = zshuffle(get_vr(op.rb) & 0x7, 3, 3, 3, 3); + set_vr(op.rt, fshl(a, zshuffle(a, 3, 0, 1, 2), b)); } void ROTQMBI(spu_opcode_t op) { - const auto a = get_vr(op.ra); - const auto b = eval(-(get_vr(op.rb) >> 32) & 0x7); - set_vr(op.rt, a >> zshuffle(b, 1, 1) | zshuffle(a, 1, 2) << 56 << zshuffle(8 - b, 1, 1)); + const auto a = get_vr(op.ra); + const auto b = zshuffle(-get_vr(op.rb) & 0x7, 3, 3, 3, 3); + set_vr(op.rt, fshr(zshuffle(a, 1, 2, 3, 4), a, b)); } void SHLQBI(spu_opcode_t op) { - const auto a = get_vr(op.ra); - const auto b = eval((get_vr(op.rb) >> 32) & 0x7); - set_vr(op.rt, a << zshuffle(b, 1, 1) | zshuffle(a, 2, 0) >> 56 >> zshuffle(8 - b, 1, 1)); + const auto a = get_vr(op.ra); + const auto b = zshuffle(get_vr(op.rb) & 0x7, 3, 3, 3, 3); + set_vr(op.rt, fshl(a, zshuffle(a, 4, 0, 1, 2), b)); } void ROTQBY(spu_opcode_t op) @@ -5233,23 +5233,23 @@ public: void ROTQBII(spu_opcode_t op) { - const auto a = get_vr(op.ra); - const auto b = eval(get_imm(op.i7, false) & 0x7); - set_vr(op.rt, a << b | zshuffle(a, 1, 0) >> 56 >> (8 - b)); + const auto a = get_vr(op.ra); + const auto b = eval(get_imm(op.i7, false) & 0x7); + set_vr(op.rt, fshl(a, zshuffle(a, 3, 0, 1, 2), b)); } void ROTQMBII(spu_opcode_t op) { - const auto a = get_vr(op.ra); - const auto b = eval(-get_imm(op.i7, false) & 0x7); - set_vr(op.rt, a >> b | zshuffle(a, 1, 2) << 56 << (8 - b)); + const auto a = get_vr(op.ra); + const auto b = eval(-get_imm(op.i7, false) & 0x7); + set_vr(op.rt, fshr(zshuffle(a, 1, 2, 3, 4), a, b)); } void SHLQBII(spu_opcode_t op) { - const auto a = get_vr(op.ra); - const auto b = eval(get_imm(op.i7, false) & 0x7); - set_vr(op.rt, a << b | zshuffle(a, 2, 0) >> 56 >> (8 - b)); + const auto a = get_vr(op.ra); + const auto b = eval(get_imm(op.i7, false) & 0x7); + set_vr(op.rt, fshl(a, zshuffle(a, 4, 0, 1, 2), b)); } void ROTQBYI(spu_opcode_t op)