From 665d74740a818e4fd6e4ca835d6fe4fbd0508d5b Mon Sep 17 00:00:00 2001 From: DH Date: Fri, 14 Jul 2023 04:33:45 +0300 Subject: [PATCH] [amdgpu] Implement V_FMA_F32, IMAGE_SAMPLE_LZ, V_CVT_OFF_F32_I4 Loops fix Decompile spirv on error Wait for rpcsx-os if memory not exists --- hw/amdgpu/device/CMakeLists.txt | 3 + hw/amdgpu/device/src/device.cpp | 110 ++++++--- .../libspirv/include/spirv/spirv-builder.hpp | 45 ++++ .../amdgpu/shader/ConverterContext.hpp | 3 - .../shader/include/amdgpu/shader/Fragment.hpp | 8 +- .../shader/include/amdgpu/shader/Function.hpp | 10 +- hw/amdgpu/shader/src/Converter.cpp | 117 +++++++-- hw/amdgpu/shader/src/Fragment.cpp | 227 ++++++++++++++++-- hw/amdgpu/shader/src/Function.cpp | 3 +- rpcsx-gpu/main.cpp | 9 + 10 files changed, 458 insertions(+), 77 deletions(-) diff --git a/hw/amdgpu/device/CMakeLists.txt b/hw/amdgpu/device/CMakeLists.txt index 83db308a9..a4c1bebb6 100644 --- a/hw/amdgpu/device/CMakeLists.txt +++ b/hw/amdgpu/device/CMakeLists.txt @@ -44,6 +44,8 @@ add_precompiled_vulkan_spirv(${PROJECT_NAME}-shaders find_package(SPIRV-Tools REQUIRED CONFIG) find_package(SPIRV-Tools-opt REQUIRED CONFIG) +find_package(spirv_cross_core REQUIRED CONFIG) +find_package(spirv_cross_glsl REQUIRED CONFIG) add_library(${PROJECT_NAME} STATIC ${INCLUDE} ${SRC}) target_link_libraries(${PROJECT_NAME} @@ -55,6 +57,7 @@ PUBLIC util SPIRV-Tools SPIRV-Tools-opt + spirv-cross-glsl PRIVATE ${PROJECT_NAME}-shaders diff --git a/hw/amdgpu/device/src/device.cpp b/hw/amdgpu/device/src/device.cpp index dbad8888d..2c9590c27 100644 --- a/hw/amdgpu/device/src/device.cpp +++ b/hw/amdgpu/device/src/device.cpp @@ -1,7 +1,6 @@ #include "device.hpp" #include "tiler.hpp" -// #include "spirv-tools/libspirv.hpp" #include "spirv-tools/optimizer.hpp" #include "util/unreachable.hpp" #include @@ -18,10 +17,10 @@ #include #include #include -#include -// #include +#include #include #include +#include #include using namespace amdgpu; @@ -1679,6 +1678,34 @@ static VkFormat surfaceFormatToVkFormat(SurfaceFormat surface, } break; + case kSurfaceFormat8_8: + switch (channel) { + case kTextureChannelTypeUNorm: + return VK_FORMAT_R8G8_UNORM; + case kTextureChannelTypeSNorm: + return VK_FORMAT_R8G8_SNORM; + case kTextureChannelTypeUInt: + return VK_FORMAT_R8G8_UINT; + case kTextureChannelTypeSInt: + return VK_FORMAT_R8G8_SINT; + default: + break; + } + break; + + case kSurfaceFormat16_16: + switch (channel) { + case kTextureChannelTypeUInt: + return VK_FORMAT_R16G16_UINT; + case kTextureChannelTypeSInt: + return VK_FORMAT_R16G16_SINT; + case kTextureChannelTypeFloat: + return VK_FORMAT_R16G16_SFLOAT; + default: + break; + } + break; + case kSurfaceFormat32_32: switch (channel) { case kTextureChannelTypeUInt: @@ -1978,36 +2005,35 @@ static bool validateSpirv(const std::vector &bin) { } static void printSpirv(const std::vector &bin) { - // spv_target_env target_env = SPV_ENV_VULKAN_1_3; - // spv_context spvContext = spvContextCreate(target_env); - // spv_diagnostic diagnostic = nullptr; + spv_target_env target_env = SPV_ENV_VULKAN_1_3; + spv_context spvContext = spvContextCreate(target_env); + spv_diagnostic diagnostic = nullptr; - // spv_result_t error = spvBinaryToText( - // spvContext, bin.data(), bin.size(), - // SPV_BINARY_TO_TEXT_OPTION_PRINT | // SPV_BINARY_TO_TEXT_OPTION_COLOR | - // SPV_BINARY_TO_TEXT_OPTION_FRIENDLY_NAMES | - // SPV_BINARY_TO_TEXT_OPTION_COMMENT | - // SPV_BINARY_TO_TEXT_OPTION_INDENT, - // nullptr, &diagnostic); + spv_result_t error = spvBinaryToText( + spvContext, bin.data(), bin.size(), + SPV_BINARY_TO_TEXT_OPTION_PRINT | // SPV_BINARY_TO_TEXT_OPTION_COLOR | + SPV_BINARY_TO_TEXT_OPTION_FRIENDLY_NAMES | + SPV_BINARY_TO_TEXT_OPTION_COMMENT | SPV_BINARY_TO_TEXT_OPTION_INDENT, + nullptr, &diagnostic); - // if (error != 0) { - // spvDiagnosticPrint(diagnostic); - // } + if (error != 0) { + spvDiagnosticPrint(diagnostic); + } - // spvDiagnosticDestroy(diagnostic); - // spvContextDestroy(spvContext); + spvDiagnosticDestroy(diagnostic); + spvContextDestroy(spvContext); - // if (error != 0) { - // return; - // } + if (error != 0) { + return; + } - // spirv_cross::CompilerGLSL glsl(bin); - // spirv_cross::CompilerGLSL::Options options; - // options.version = 460; - // options.es = false; - // options.vulkan_semantics = true; - // glsl.set_common_options(options); - // std::printf("%s\n", glsl.compile().c_str()); + spirv_cross::CompilerGLSL glsl(bin); + spirv_cross::CompilerGLSL::Options options; + options.version = 460; + options.es = false; + options.vulkan_semantics = true; + glsl.set_common_options(options); + std::printf("%s\n", glsl.compile().c_str()); } static std::optional> @@ -3002,7 +3028,7 @@ struct RenderState { tbuffer->tiling_idx); std::fflush(stdout); - assert(tbuffer->width == tbuffer->pitch); + // assert(tbuffer->width == tbuffer->pitch); auto image = Image2D::Allocate( getDeviceLocalMemory(), tbuffer->width + 1, tbuffer->height + 1, @@ -3989,16 +4015,36 @@ void amdgpu::device::handleCommandBuffer(RemoteMemory memory, DrawContext &ctxt, void amdgpu::device::AmdgpuDevice::handleProtectMemory(std::uint64_t address, std::uint64_t size, std::uint32_t prot) { - ::mprotect(memory.getPointer(address), size, prot >> 4); - auto beginPage = address / kPageSize; auto endPage = (address + size + kPageSize - 1) / kPageSize; + ::mprotect(memory.getPointer(address), size, prot >> 4); + if (prot >> 4) { memoryZoneTable.map(beginPage, endPage); - std::printf("Allocated area at %zx, size %zu\n", address, size); + const char *protStr; + switch (prot >> 4) { + case PROT_READ: + protStr = "R"; + break; + + case PROT_WRITE: + protStr = "W"; + break; + + case PROT_WRITE | PROT_READ: + protStr = "W"; + break; + + default: + protStr = "unknown"; + break; + } + std::printf("Allocated area at %zx, size %lx, prot %s\n", address, size, + protStr); } else { memoryZoneTable.unmap(beginPage, endPage); + std::printf("Unmapped area at %zx, size %lx\n", address, size); } } void amdgpu::device::AmdgpuDevice::handleCommandBuffer(std::uint64_t address, diff --git a/hw/amdgpu/lib/libspirv/include/spirv/spirv-builder.hpp b/hw/amdgpu/lib/libspirv/include/spirv/spirv-builder.hpp index 3f332d83d..75cfaf6b5 100644 --- a/hw/amdgpu/lib/libspirv/include/spirv/spirv-builder.hpp +++ b/hw/amdgpu/lib/libspirv/include/spirv/spirv-builder.hpp @@ -1165,6 +1165,25 @@ public: return id; } + // relational + BoolValue createIsNan(BoolType resultType, Value operand1) { + auto region = bodyRegion.pushOp(spv::Op::OpIsNan, 4); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + return id; + } + + BoolValue createIsInf(BoolType resultType, Value operand1) { + auto region = bodyRegion.pushOp(spv::Op::OpIsInf, 4); + auto id = newId(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(operand1); + return id; + } + // logic BoolValue createLogicalEqual(BoolType resultType, Value operand1, Value operand2) { @@ -1489,6 +1508,32 @@ public: return id; } + VectorOfValue createImageSampleExplicitLod( + VectorOfType resultType, SampledImageValue sampledImage, + ScalarOrVectorOfValue coords, + spv::ImageOperandsMask operands = spv::ImageOperandsMask::MaskNone, + std::span args = {}) { + auto region = bodyRegion.pushOp( + spv::Op::OpImageSampleExplicitLod, + 5 + (operands == spv::ImageOperandsMask::MaskNone ? 0 + : 1 + args.size())); + auto id = newId>(); + region.pushIdUse(resultType); + region.pushIdDef(id); + region.pushIdUse(sampledImage); + region.pushIdUse(coords); + + if (operands != spv::ImageOperandsMask::MaskNone) { + region.pushWord(static_cast(operands)); + + for (auto arg : args) { + region.pushIdUse(arg); + } + } + + return id; + } + Value createImageQuerySizeLod(Type resultType, ImageValue image, Value lod) { auto region = bodyRegion.pushOp(spv::Op::OpImageQuerySizeLod, 5); auto id = newId(); diff --git a/hw/amdgpu/shader/include/amdgpu/shader/ConverterContext.hpp b/hw/amdgpu/shader/include/amdgpu/shader/ConverterContext.hpp index 6fd5fa941..f607d12f2 100644 --- a/hw/amdgpu/shader/include/amdgpu/shader/ConverterContext.hpp +++ b/hw/amdgpu/shader/include/amdgpu/shader/ConverterContext.hpp @@ -2,12 +2,9 @@ #include "Fragment.hpp" #include "Function.hpp" -#include "RegisterId.hpp" #include "Stage.hpp" #include "TypeId.hpp" #include "Uniform.hpp" -#include "Value.hpp" -#include "scf.hpp" #include #include diff --git a/hw/amdgpu/shader/include/amdgpu/shader/Fragment.hpp b/hw/amdgpu/shader/include/amdgpu/shader/Fragment.hpp index 03574657a..c3377437d 100644 --- a/hw/amdgpu/shader/include/amdgpu/shader/Fragment.hpp +++ b/hw/amdgpu/shader/include/amdgpu/shader/Fragment.hpp @@ -1,16 +1,11 @@ #pragma once - -#include "FragmentTerminator.hpp" -#include "Instruction.hpp" #include "RegisterId.hpp" #include "RegisterState.hpp" -#include "Stage.hpp" #include "TypeId.hpp" -#include "Uniform.hpp" -#include "scf.hpp" #include #include +#include #include namespace amdgpu::shader { @@ -32,6 +27,7 @@ struct Fragment { std::vector predecessors; std::uint64_t jumpAddress = 0; spirv::BoolValue branchCondition; + bool hasTerminator = false; void appendBranch(Fragment &other) { other.predecessors.push_back(this); } diff --git a/hw/amdgpu/shader/include/amdgpu/shader/Function.hpp b/hw/amdgpu/shader/include/amdgpu/shader/Function.hpp index 3c11e4596..80d82566a 100644 --- a/hw/amdgpu/shader/include/amdgpu/shader/Function.hpp +++ b/hw/amdgpu/shader/include/amdgpu/shader/Function.hpp @@ -1,6 +1,7 @@ #pragma once #include "Fragment.hpp" #include "RegisterId.hpp" +#include "Stage.hpp" #include "spirv/spirv-builder.hpp" #include @@ -24,7 +25,14 @@ struct Function { spirv::Type getResultType(); spirv::FunctionType getFunctionType(); - Fragment *createFragment(); + Fragment *createFragment() { + auto result = createDetachedFragment(); + appendFragment(result); + return result; + } + + Fragment *createDetachedFragment(); + void appendFragment(Fragment *fragment) { fragments.push_back(fragment); } void insertReturn(); }; diff --git a/hw/amdgpu/shader/src/Converter.cpp b/hw/amdgpu/shader/src/Converter.cpp index ebfb8fd97..4581d7f24 100644 --- a/hw/amdgpu/shader/src/Converter.cpp +++ b/hw/amdgpu/shader/src/Converter.cpp @@ -49,7 +49,7 @@ public: function = fn; memory = mem; - auto lastFragment = convertBlock(block, &function->entryFragment); + auto lastFragment = convertBlock(block, &function->entryFragment, nullptr); if (lastFragment != nullptr) { lastFragment->builder.createBranch(fn->exitFragment.entryBlockId); @@ -126,7 +126,8 @@ private: return builder.createLogicalOr(boolT, loIsNotZero, hiIsNotZero); } - Fragment *convertBlock(scf::Block *block, Fragment *rootFragment) { + Fragment *convertBlock(scf::Block *block, Fragment *rootFragment, + Fragment *loopMergeFragment) { Fragment *currentFragment = nullptr; for (scf::Node *node = block->getRootNode(); node != nullptr; @@ -178,6 +179,33 @@ private: } if (auto ifElse = dynCast(node)) { + auto isBreakBlock = [](scf::Block *block) { + if (block->isEmpty()) { + return false; + } + if (block->getLastNode() != block->getRootNode()) { + return false; + } + + return dynamic_cast(block->getRootNode()) != nullptr; + }; + + if (loopMergeFragment != nullptr && ifElse->ifTrue->isEmpty() && + isBreakBlock(ifElse->ifFalse)) { + auto mergeFragment = function->createFragment(); + currentFragment->appendBranch(*mergeFragment); + currentFragment->appendBranch(*loopMergeFragment); + + currentFragment->builder.createBranchConditional( + currentFragment->branchCondition, mergeFragment->entryBlockId, + loopMergeFragment->entryBlockId); + + initState(mergeFragment); + releaseStateOf(currentFragment); + currentFragment = mergeFragment; + continue; + } + auto ifTrueFragment = function->createFragment(); auto ifFalseFragment = function->createFragment(); auto mergeFragment = function->createFragment(); @@ -185,18 +213,16 @@ private: currentFragment->appendBranch(*ifTrueFragment); currentFragment->appendBranch(*ifFalseFragment); - currentFragment->builder.createSelectionMerge( - mergeFragment->entryBlockId, {}); - currentFragment->builder.createBranchConditional( - currentFragment->branchCondition, ifTrueFragment->entryBlockId, - ifFalseFragment->entryBlockId); - - auto ifTrueLastBlock = convertBlock(ifElse->ifTrue, ifTrueFragment); - auto ifFalseLastBlock = convertBlock(ifElse->ifFalse, ifFalseFragment); + auto ifTrueLastBlock = + convertBlock(ifElse->ifTrue, ifTrueFragment, loopMergeFragment); + auto ifFalseLastBlock = + convertBlock(ifElse->ifFalse, ifFalseFragment, loopMergeFragment); if (ifTrueLastBlock != nullptr) { - ifTrueLastBlock->builder.createBranch(mergeFragment->entryBlockId); - ifTrueLastBlock->appendBranch(*mergeFragment); + if (!ifTrueLastBlock->hasTerminator) { + ifTrueLastBlock->builder.createBranch(mergeFragment->entryBlockId); + ifTrueLastBlock->appendBranch(*mergeFragment); + } if (ifTrueLastBlock->registers == nullptr) { initState(ifTrueLastBlock); @@ -204,14 +230,23 @@ private: } if (ifFalseLastBlock != nullptr) { - ifFalseLastBlock->builder.createBranch(mergeFragment->entryBlockId); - ifFalseLastBlock->appendBranch(*mergeFragment); + if (!ifFalseLastBlock->hasTerminator) { + ifFalseLastBlock->builder.createBranch(mergeFragment->entryBlockId); + ifFalseLastBlock->appendBranch(*mergeFragment); + } if (ifFalseLastBlock->registers == nullptr) { initState(ifFalseLastBlock); } } + currentFragment->builder.createSelectionMerge( + mergeFragment->entryBlockId, {}); + + currentFragment->builder.createBranchConditional( + currentFragment->branchCondition, ifTrueFragment->entryBlockId, + ifFalseFragment->entryBlockId); + releaseStateOf(currentFragment); initState(mergeFragment); @@ -226,6 +261,56 @@ private: continue; } + if (auto loop = dynCast(node)) { + auto headerFragment = function->createFragment(); + auto bodyFragment = function->createFragment(); + auto mergeFragment = function->createDetachedFragment(); + auto continueFragment = function->createDetachedFragment(); + + currentFragment->builder.createBranch(headerFragment->entryBlockId); + currentFragment->appendBranch(*headerFragment); + + initState(headerFragment); + releaseStateOf(currentFragment); + + headerFragment->builder.createLoopMerge( + mergeFragment->entryBlockId, continueFragment->entryBlockId, + spv::LoopControlMask::MaskNone, {}); + + headerFragment->builder.createBranch(bodyFragment->entryBlockId); + headerFragment->appendBranch(*bodyFragment); + + auto bodyLastBlock = + convertBlock(loop->body, bodyFragment, mergeFragment); + + if (bodyLastBlock != nullptr) { + if (bodyLastBlock->registers == nullptr) { + initState(bodyLastBlock); + } + + bodyLastBlock->builder.createBranch(continueFragment->entryBlockId); + bodyLastBlock->appendBranch(*continueFragment); + } + + continueFragment->builder.createBranch(headerFragment->entryBlockId); + continueFragment->appendBranch(*headerFragment); + initState(continueFragment); + + releaseStateOf(headerFragment); + initState(mergeFragment); + + if (bodyLastBlock != nullptr) { + releaseStateOf(bodyLastBlock); + } + + function->appendFragment(continueFragment); + function->appendFragment(mergeFragment); + releaseStateOf(continueFragment); + + currentFragment = mergeFragment; + continue; + } + if (dynCast(node)) { auto jumpAddress = currentFragment->jumpAddress; @@ -250,7 +335,7 @@ private: auto targetFragment = function->createFragment(); currentFragment->builder.createBranch(targetFragment->entryBlockId); currentFragment->appendBranch(*targetFragment); - auto result = convertBlock(scfBlock, targetFragment); + auto result = convertBlock(scfBlock, targetFragment, nullptr); if (currentFragment->registers == nullptr) { initState(targetFragment); @@ -264,9 +349,11 @@ private: currentFragment->appendBranch(function->exitFragment); currentFragment->builder.createBranch( function->exitFragment.entryBlockId); + currentFragment->hasTerminator = true; return nullptr; } + node->dump(); util::unreachable(); } diff --git a/hw/amdgpu/shader/src/Fragment.cpp b/hw/amdgpu/shader/src/Fragment.cpp index cbd5ff8cc..4c53267a5 100644 --- a/hw/amdgpu/shader/src/Fragment.cpp +++ b/hw/amdgpu/shader/src/Fragment.cpp @@ -1,10 +1,12 @@ #include "Fragment.hpp" #include "ConverterContext.hpp" +#include "Instruction.hpp" #include "RegisterId.hpp" #include "RegisterState.hpp" #include #include +#include #include #include @@ -553,7 +555,8 @@ enum class CmpKind { NLT, NE, TRU, - T = TRU + T = TRU, + CLASS }; enum class CmpFlags { None = 0, X = 1 << 0, S = 1 << 1, SX = S | X }; @@ -562,7 +565,8 @@ inline CmpFlags operator&(CmpFlags a, CmpFlags b) { } Value doCmpOp(Fragment &fragment, TypeId type, spirv::Value src0, - spirv::Value src1, CmpKind kind, CmpFlags flags) { + spirv::Value src1, CmpKind kind, CmpFlags flags, + std::uint8_t typeMask = 0) { spirv::BoolValue cmp; auto boolT = fragment.context->getBoolType(); @@ -652,6 +656,89 @@ Value doCmpOp(Fragment &fragment, TypeId type, spirv::Value src0, case CmpKind::TRU: cmp = fragment.context->getTrue(); break; + + case CmpKind::CLASS: { + enum class FloatClass { + SNan = 0, + QNan = 1, + NInf = 2, + NNorm = 3, + NDenom = 4, + NZero = 5, + PZero = 6, + PDenom = 7, + PNorm = 8, + PInf = 9, + }; + + auto testCmpClass = [&](FloatClass fclass, + spirv::FloatValue val) -> spirv::BoolValue { + switch (fclass) { + case FloatClass::SNan: + case FloatClass::QNan: + return fragment.builder.createIsNan(boolT, val); + + case FloatClass::NInf: + return fragment.builder.createLogicalAnd( + boolT, + fragment.builder.createFOrdLessThan( + boolT, val, fragment.context->getFloat32(0)), + fragment.builder.createIsInf(boolT, val)); + + case FloatClass::NZero: + case FloatClass::PZero: + return fragment.builder.createFOrdEqual( + boolT, val, fragment.context->getFloat32(0)); + + case FloatClass::NNorm: + case FloatClass::NDenom: + case FloatClass::PDenom: + case FloatClass::PNorm: + util::unreachable(); + + case FloatClass::PInf: + return fragment.builder.createLogicalAnd( + boolT, + fragment.builder.createFOrdGreaterThan( + boolT, val, fragment.context->getFloat32(0)), + fragment.builder.createIsInf(boolT, val)); + } + + util::unreachable(); + }; + + // we cannot differ signaling and quiet nan + if (typeMask & 3) { + typeMask = (typeMask & ~3) | 2; + } + + // we cannot differ positive and negative zero + if (typeMask & 0x60) { + typeMask = (typeMask & ~0x60) | 0x40; + } + + for (int i = 0; i < 10; ++i) { + if (typeMask & (1 << i)) { + auto lhs = + testCmpClass((FloatClass)i, spirv::cast(src0)); + auto rhs = + testCmpClass((FloatClass)i, spirv::cast(src1)); + + auto bitResult = fragment.builder.createLogicalAnd(boolT, lhs, rhs); + + if (cmp) { + cmp = fragment.builder.createLogicalOr(boolT, cmp, bitResult); + } else { + cmp = bitResult; + } + } + } + + if (!cmp) { + cmp = fragment.context->getFalse(); + } + break; + } } if (!cmp) { @@ -1563,7 +1650,20 @@ void convertVop3(Fragment &fragment, Vop3 inst) { auto src0 = fragment.getScalarOperand(inst.src0, type).value; auto src1 = fragment.getScalarOperand(inst.src1, type).value; - auto result = doCmpOp(fragment, type, src0, src1, kind, flags); + std::int8_t typeMask = 0; + if (kind == CmpKind::CLASS) { + auto value = fragment.context->findSint32Value( + fragment.getScalarOperand(inst.src2, type).value); + + if (!value) { + // util::unreachable(); + typeMask = 2; + } else { + typeMask = *value; + } + } + + auto result = doCmpOp(fragment, type, src0, src1, kind, flags, typeMask); fragment.setScalarOperand(inst.vdst, result); fragment.setScalarOperand(inst.vdst + 1, {fragment.context->getUInt32Type(), fragment.context->getUInt32(0)}); @@ -1978,8 +2078,9 @@ void convertVop3(Fragment &fragment, Vop3 inst) { case Vop3::Op::V3_CMP_T_I32: cmpOp(TypeId::SInt32, CmpKind::T); break; - // case Vop3::Op::V3_CMP_CLASS_F32: cmpOp(TypeId::Float32, CmpKind::CLASS); - // break; + case Vop3::Op::V3_CMP_CLASS_F32: + cmpOp(TypeId::Float32, CmpKind::CLASS); + break; case Vop3::Op::V3_CMP_LT_I16: cmpOp(TypeId::SInt16, CmpKind::LT); break; @@ -1998,8 +2099,9 @@ void convertVop3(Fragment &fragment, Vop3 inst) { case Vop3::Op::V3_CMP_GE_I16: cmpOp(TypeId::SInt16, CmpKind::GE); break; - // case Vop3::Op::V3_CMP_CLASS_F16: cmpOp(TypeId::Float16, CmpKind::CLASS); - // break; + case Vop3::Op::V3_CMP_CLASS_F16: + cmpOp(TypeId::Float16, CmpKind::CLASS); + break; case Vop3::Op::V3_CMPX_F_I32: cmpOp(TypeId::SInt32, CmpKind::F, CmpFlags::X); break; @@ -2024,8 +2126,9 @@ void convertVop3(Fragment &fragment, Vop3 inst) { case Vop3::Op::V3_CMPX_T_I32: cmpOp(TypeId::SInt32, CmpKind::T, CmpFlags::X); break; - // case Vop3::Op::V3_CMPX_CLASS_F32: cmpOp(TypeId::Float32, CmpKind::CLASS, - // CmpFlags::X); break; + case Vop3::Op::V3_CMPX_CLASS_F32: + cmpOp(TypeId::Float32, CmpKind::CLASS, CmpFlags::X); + break; case Vop3::Op::V3_CMPX_LT_I16: cmpOp(TypeId::SInt16, CmpKind::LT, CmpFlags::X); break; @@ -2044,8 +2147,9 @@ void convertVop3(Fragment &fragment, Vop3 inst) { case Vop3::Op::V3_CMPX_GE_I16: cmpOp(TypeId::SInt16, CmpKind::GE, CmpFlags::X); break; - // case Vop3::Op::V3_CMPX_CLASS_F16: cmpOp(TypeId::Float16, CmpKind::CLASS, - // CmpFlags::X); break; + case Vop3::Op::V3_CMPX_CLASS_F16: + cmpOp(TypeId::Float16, CmpKind::CLASS, CmpFlags::X); + break; case Vop3::Op::V3_CMP_F_I64: cmpOp(TypeId::SInt64, CmpKind::F); break; @@ -2070,8 +2174,9 @@ void convertVop3(Fragment &fragment, Vop3 inst) { case Vop3::Op::V3_CMP_T_I64: cmpOp(TypeId::SInt64, CmpKind::T); break; - // case Vop3::Op::V3_CMP_CLASS_F64: cmpOp(TypeId::Float64, CmpKind::CLASS); - // break; + case Vop3::Op::V3_CMP_CLASS_F64: + cmpOp(TypeId::Float64, CmpKind::CLASS); + break; case Vop3::Op::V3_CMP_LT_U16: cmpOp(TypeId::UInt16, CmpKind::LT); break; @@ -2114,8 +2219,9 @@ void convertVop3(Fragment &fragment, Vop3 inst) { case Vop3::Op::V3_CMPX_T_I64: cmpOp(TypeId::SInt64, CmpKind::T, CmpFlags::X); break; - // case Vop3::Op::V3_CMPX_CLASS_F64: cmpOp(TypeId::Float64, CmpKind::CLASS, - // CmpFlags::X); break; + case Vop3::Op::V3_CMPX_CLASS_F64: + cmpOp(TypeId::Float64, CmpKind::CLASS, CmpFlags::X); + break; case Vop3::Op::V3_CMPX_LT_U16: cmpOp(TypeId::UInt16, CmpKind::LT, CmpFlags::X); break; @@ -2515,8 +2621,6 @@ void convertVop3(Fragment &fragment, Vop3 inst) { fragment.getScalarOperand(inst.src0, TypeId::SInt32).value); auto src1 = spirv::cast( fragment.getScalarOperand(inst.src1, TypeId::SInt32).value); - auto src2 = spirv::cast( - fragment.getScalarOperand(inst.src2, TypeId::SInt32).value); auto operandT = fragment.context->getSint32Type(); src0 = fragment.builder.createShiftLeftLogical( @@ -2548,6 +2652,42 @@ void convertVop3(Fragment &fragment, Vop3 inst) { fragment.setVectorOperand(inst.vdst, {floatT, result}); break; } + case Vop3::Op::V3_MAX3_F32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::Float32).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.src1, TypeId::Float32).value); + auto src2 = spirv::cast( + fragment.getScalarOperand(inst.src2, TypeId::Float32).value); + auto floatT = fragment.context->getFloat32Type(); + auto boolT = fragment.context->getBoolType(); + + auto max01 = fragment.builder.createSelect( + floatT, fragment.builder.createFOrdGreaterThanEqual(boolT, src0, src1), + src0, src1); + auto result = fragment.builder.createSelect( + floatT, fragment.builder.createFOrdGreaterThanEqual(boolT, max01, src2), + max01, src2); + + fragment.setVectorOperand(inst.vdst, {floatT, result}); + break; + } + case Vop3::Op::V3_FMA_F32: { + auto src0 = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::Float32).value); + auto src1 = spirv::cast( + fragment.getScalarOperand(inst.src1, TypeId::Float32).value); + auto src2 = spirv::cast( + fragment.getScalarOperand(inst.src2, TypeId::Float32).value); + auto floatT = fragment.context->getFloat32Type(); + auto glslStd450 = fragment.context->getGlslStd450(); + + auto result = fragment.builder.createExtInst( + floatT, glslStd450, GLSLstd450Fma, {{src0, src1, src2}}); + + fragment.setVectorOperand(inst.vdst, {floatT, result}); + break; + } case Vop3::Op::V3_CNDMASK_B32: { auto src0 = fragment.getScalarOperand(inst.src0, TypeId::UInt32).value; auto src1 = fragment.getScalarOperand(inst.src1, TypeId::UInt32).value; @@ -3321,6 +3461,40 @@ void convertMimg(Fragment &fragment, Mimg inst) { } break; } + + case Mimg::Op::IMAGE_SAMPLE_LZ: { + auto image = + fragment.createImage(RegisterId::Raw(inst.srsrc << 2), inst.r128); + auto sampler = fragment.createSampler(RegisterId::Raw(inst.ssamp << 2)); + auto coord0 = fragment.getVectorOperand(inst.vaddr, TypeId::Float32).value; + auto coord1 = + fragment.getVectorOperand(inst.vaddr + 1, TypeId::Float32).value; + auto coord2 = + fragment.getVectorOperand(inst.vaddr + 2, TypeId::Float32).value; + auto coords = fragment.builder.createCompositeConstruct( + fragment.context->getFloat32x3Type(), + {{coord0, coord1, coord2}}); // TODO + + auto sampledImage2dT = fragment.context->getSampledImage2DType(); + auto float4T = fragment.context->getFloat32x4Type(); + auto floatT = fragment.context->getFloat32Type(); + auto sampledImage = + fragment.builder.createSampledImage(sampledImage2dT, image, sampler); + auto value = fragment.builder.createImageSampleExplicitLod( + float4T, sampledImage, coords, spv::ImageOperandsMask::Lod, + {{fragment.context->getFloat32(0)}}); + + for (std::uint32_t dstOffset = 0, i = 0; i < 4; ++i) { + if (inst.dmask & (1 << i)) { + fragment.setVectorOperand( + inst.vdata + dstOffset++, + {floatT, + fragment.builder.createCompositeExtract(floatT, value, {{i}})}); + } + } + break; + } + case Mimg::Op::IMAGE_SAMPLE: { auto image = fragment.createImage(RegisterId::Raw(inst.srsrc << 2), inst.r128); @@ -3499,11 +3673,28 @@ void convertVop1(Fragment &fragment, Vop1 inst) { break; } + case Vop1::Op::V_CVT_OFF_F32_I4: { + auto src = spirv::cast( + fragment.getScalarOperand(inst.src0, TypeId::SInt32).value); + auto floatT = fragment.context->getFloat32Type(); + auto int32T = fragment.context->getSint32Type(); + src = spirv::cast(fragment.builder.createBitwiseAnd( + int32T, src, fragment.context->getSInt32(0b1111))); + src = fragment.builder.createISub(int32T, src, + fragment.context->getSInt32(8)); + + auto fsrc = fragment.builder.createConvertSToF(floatT, src); + auto result = fragment.builder.createFDiv(floatT, fsrc, + fragment.context->getFloat32(16)); + + fragment.setVectorOperand(inst.vdst, {floatT, result}); + break; + } + case Vop1::Op::V_RSQ_F32: { auto src = spirv::cast( fragment.getScalarOperand(inst.src0, TypeId::Float32).value); auto floatT = fragment.context->getFloat32Type(); - auto float1 = fragment.context->getFloat32(1); auto glslStd450 = fragment.context->getGlslStd450(); auto result = fragment.builder.createExtInst( diff --git a/hw/amdgpu/shader/src/Function.cpp b/hw/amdgpu/shader/src/Function.cpp index a87618240..096be6147 100644 --- a/hw/amdgpu/shader/src/Function.cpp +++ b/hw/amdgpu/shader/src/Function.cpp @@ -225,10 +225,9 @@ spirv::FunctionType Function::getFunctionType() { return context->getFunctionType(getResultType(), params); } -Fragment *Function::createFragment() { +Fragment *Function::createDetachedFragment() { auto result = context->createFragment(0); result->function = this; - fragments.push_back(result); return result; } diff --git a/rpcsx-gpu/main.cpp b/rpcsx-gpu/main.cpp index e90ed5d3f..cbc69d4b1 100644 --- a/rpcsx-gpu/main.cpp +++ b/rpcsx-gpu/main.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -689,10 +690,18 @@ int main(int argc, const char *argv[]) { amdgpu::bridge::BridgePuller bridgePuller{bridge}; amdgpu::bridge::Command commandsBuffer[32]; + if (!std::filesystem::exists(std::string("/dev/shm") + shmName)) { + std::printf("Waiting for OS\n"); + while (!std::filesystem::exists(std::string("/dev/shm") + shmName)) { + std::this_thread::sleep_for(std::chrono::milliseconds(300)); + } + } + int memoryFd = ::shm_open(shmName, O_RDWR, S_IRUSR | S_IWUSR); if (memoryFd < 0) { std::printf("failed to open shared memory\n"); + return 1; } struct stat memoryStat;