diff --git a/hw/amdgpu/device/include/amdgpu/device/device.hpp b/hw/amdgpu/device/include/amdgpu/device/device.hpp index f86e84078..a180fd554 100644 --- a/hw/amdgpu/device/include/amdgpu/device/device.hpp +++ b/hw/amdgpu/device/include/amdgpu/device/device.hpp @@ -1469,7 +1469,8 @@ struct AmdgpuDevice { void handleProtectMemory(std::uint64_t address, std::uint64_t size, std::uint32_t prot); - void handleCommandBuffer(std::uint64_t address, std::uint64_t size); + void handleCommandBuffer(std::uint64_t queueId, std::uint64_t address, + std::uint64_t size); bool handleFlip(std::uint32_t bufferIndex, std::uint64_t arg, VkCommandBuffer cmd, VkImage targetImage, VkExtent2D targetExtent, std::vector &usedBuffers, diff --git a/hw/amdgpu/device/src/device.cpp b/hw/amdgpu/device/src/device.cpp index 23e5faace..20eb2055b 100644 --- a/hw/amdgpu/device/src/device.cpp +++ b/hw/amdgpu/device/src/device.cpp @@ -408,19 +408,6 @@ static constexpr auto CB_BLEND0_CONTROL_SEPARATE_ALPHA_BLEND_MASK = static constexpr auto CB_BLEND0_CONTROL_BLEND_ENABLE_MASK = genMask(getMaskEnd(CB_BLEND0_CONTROL_SEPARATE_ALPHA_BLEND_MASK), 1); -static std::uint64_t pgmPsAddress = 0; -static std::uint64_t pgmVsAddress = 0; -static std::uint64_t pgmComputeAddress = 0; -static std::uint32_t userVsData[16]; -static std::uint32_t userPsData[16]; -static std::uint32_t userComputeData[16]; -static std::uint32_t computeNumThreadX = 1; -static std::uint32_t computeNumThreadY = 1; -static std::uint32_t computeNumThreadZ = 1; -static std::uint8_t psUserSpgrs; -static std::uint8_t vsUserSpgrs; -static std::uint8_t computeUserSpgrs; - struct ColorBuffer { std::uint64_t base; std::uint8_t format; @@ -548,14 +535,25 @@ struct ColorBuffer { static constexpr std::size_t colorBuffersCount = 6; -static ColorBuffer colorBuffers[colorBuffersCount]; - -static std::uint32_t indexType; - -static std::uint32_t screenScissorX = 0; -static std::uint32_t screenScissorY = 0; -static std::uint32_t screenScissorW = 0; -static std::uint32_t screenScissorH = 0; +enum class CbRasterOp { + Blackness = 0x00, + Nor = 0x05, // ~(src | dst) + AndInverted = 0x0a, // ~src & dst + CopyInverted = 0x0f, // ~src + NotSrcErase = 0x11, // ~src & ~dst + SrcErase = 0x44, // src & ~dst + DstInvert = 0x55, // ~dst + Xor = 0x5a, // src ^ dst + Nand = 0x5f, // ~(src & dst) + And = 0x88, // src & dst + Equiv = 0x99, // ~(src ^ dst) + Noop = 0xaa, // dst + OrInverted = 0xaf, // ~src | dst + Copy = 0xcc, // src + OrReverse = 0xdd, // src | ~dst + Or = 0xEE, // src | dst + Whiteness = 0xff, +}; enum class CbColorFormat { /* @@ -579,400 +577,406 @@ enum class CbColorFormat { Resolve, }; -enum class CbRasterOp { - Blackness = 0x00, - Nor = 0x05, // ~(src | dst) - AndInverted = 0x0a, // ~src & dst - CopyInverted = 0x0f, // ~src - NotSrcErase = 0x11, // ~src & ~dst - SrcErase = 0x44, // src & ~dst - DstInvert = 0x55, // ~dst - Xor = 0x5a, // src ^ dst - Nand = 0x5f, // ~(src & dst) - And = 0x88, // src & dst - Equiv = 0x99, // ~(src ^ dst) - Noop = 0xaa, // dst - OrInverted = 0xaf, // ~src | dst - Copy = 0xcc, // src - OrReverse = 0xdd, // src | ~dst - Or = 0xEE, // src | dst - Whiteness = 0xff, +struct QueueRegisters { + std::uint64_t pgmPsAddress = 0; + std::uint64_t pgmVsAddress = 0; + std::uint64_t pgmComputeAddress = 0; + std::uint32_t userVsData[16]; + std::uint32_t userPsData[16]; + std::uint32_t userComputeData[16]; + std::uint32_t computeNumThreadX = 1; + std::uint32_t computeNumThreadY = 1; + std::uint32_t computeNumThreadZ = 1; + std::uint8_t psUserSpgrs; + std::uint8_t vsUserSpgrs; + std::uint8_t computeUserSpgrs; + + ColorBuffer colorBuffers[colorBuffersCount]; + + std::uint32_t indexType; + + std::uint32_t screenScissorX = 0; + std::uint32_t screenScissorY = 0; + std::uint32_t screenScissorW = 0; + std::uint32_t screenScissorH = 0; + + CbColorFormat cbColorFormat = CbColorFormat::Normal; + + CbRasterOp cbRasterOp = CbRasterOp::Copy; + + std::uint32_t vgtPrimitiveType = 0; + bool stencilEnable = false; + bool depthEnable = false; + bool depthWriteEnable = false; + bool depthBoundsEnable = false; + int zFunc = 0; + bool backFaceEnable = false; + int stencilFunc = 0; + int stencilFuncBackFace = 0; + + float depthClear = 1.f; + + bool cullFront = false; + bool cullBack = false; + int face = 0; // 0 - CCW, 1 - CW + bool polyMode = false; + int polyModeFrontPType = 0; + int polyModeBackPType = 0; + bool polyOffsetFrontEnable = false; + bool polyOffsetBackEnable = false; + bool polyOffsetParaEnable = false; + bool vtxWindowOffsetEnable = false; + bool provokingVtxLast = false; + bool erspCorrDis = false; + bool multiPrimIbEna = false; + + bool depthClearEnable = false; + bool stencilClearEnable = false; + bool depthCopy = false; + bool stencilCopy = false; + bool resummarizeEnable = false; + bool stencilCompressDisable = false; + bool depthCompressDisable = false; + bool copyCentroid = false; + int copySample = 0; + bool zpassIncrementDisable = false; + + std::uint64_t zReadBase = 0; + std::uint64_t zWriteBase = 0; + + BlendMultiplier blendColorSrc = {}; + BlendFunc blendColorFn = {}; + BlendMultiplier blendColorDst = {}; + BlendMultiplier blendAlphaSrc = {}; + BlendFunc blendAlphaFn = {}; + BlendMultiplier blendAlphaDst = {}; + bool blendSeparateAlpha = false; + bool blendEnable = false; + std::uint32_t cbRenderTargetMask = 0; + + void setRegister(std::uint32_t regId, std::uint32_t value) { + switch (regId) { + case SPI_SHADER_PGM_LO_PS: + pgmPsAddress &= ~((1ull << 40) - 1); + pgmPsAddress |= static_cast(value) << 8; + break; + case SPI_SHADER_PGM_HI_PS: + pgmPsAddress &= (1ull << 40) - 1; + pgmPsAddress |= static_cast(value) << 40; + break; + case SPI_SHADER_PGM_LO_VS: + pgmVsAddress &= ~((1ull << 40) - 1); + pgmVsAddress |= static_cast(value) << 8; + break; + case SPI_SHADER_PGM_HI_VS: + pgmVsAddress &= (1ull << 40) - 1; + pgmVsAddress |= static_cast(value) << 40; + break; + + case SPI_SHADER_USER_DATA_VS_0: + case SPI_SHADER_USER_DATA_VS_1: + case SPI_SHADER_USER_DATA_VS_2: + case SPI_SHADER_USER_DATA_VS_3: + case SPI_SHADER_USER_DATA_VS_4: + case SPI_SHADER_USER_DATA_VS_5: + case SPI_SHADER_USER_DATA_VS_6: + case SPI_SHADER_USER_DATA_VS_7: + case SPI_SHADER_USER_DATA_VS_8: + case SPI_SHADER_USER_DATA_VS_9: + case SPI_SHADER_USER_DATA_VS_10: + case SPI_SHADER_USER_DATA_VS_11: + case SPI_SHADER_USER_DATA_VS_12: + case SPI_SHADER_USER_DATA_VS_13: + case SPI_SHADER_USER_DATA_VS_14: + case SPI_SHADER_USER_DATA_VS_15: + userVsData[regId - SPI_SHADER_USER_DATA_VS_0] = value; + break; + + case SPI_SHADER_USER_DATA_PS_0: + case SPI_SHADER_USER_DATA_PS_1: + case SPI_SHADER_USER_DATA_PS_2: + case SPI_SHADER_USER_DATA_PS_3: + case SPI_SHADER_USER_DATA_PS_4: + case SPI_SHADER_USER_DATA_PS_5: + case SPI_SHADER_USER_DATA_PS_6: + case SPI_SHADER_USER_DATA_PS_7: + case SPI_SHADER_USER_DATA_PS_8: + case SPI_SHADER_USER_DATA_PS_9: + case SPI_SHADER_USER_DATA_PS_10: + case SPI_SHADER_USER_DATA_PS_11: + case SPI_SHADER_USER_DATA_PS_12: + case SPI_SHADER_USER_DATA_PS_13: + case SPI_SHADER_USER_DATA_PS_14: + case SPI_SHADER_USER_DATA_PS_15: + userPsData[regId - SPI_SHADER_USER_DATA_PS_0] = value; + break; + + case SPI_SHADER_PGM_RSRC2_PS: + psUserSpgrs = (value >> 1) & 0x1f; + break; + + case SPI_SHADER_PGM_RSRC2_VS: + vsUserSpgrs = (value >> 1) & 0x1f; + break; + + case CB_COLOR0_BASE ... CB_COLOR6_DCC_BASE: { + auto buffer = + (regId - CB_COLOR0_BASE) / (CB_COLOR1_BASE - CB_COLOR0_BASE); + auto index = (regId - CB_COLOR0_BASE) % (CB_COLOR1_BASE - CB_COLOR0_BASE); + colorBuffers[buffer].setRegister(index, value); + break; + } + + case DB_RENDER_CONTROL: + depthClearEnable = getBit(value, 0); + stencilClearEnable = getBit(value, 1); + depthCopy = getBit(value, 2); + stencilCopy = getBit(value, 3); + resummarizeEnable = getBit(value, 4); + stencilCompressDisable = getBit(value, 5); + depthCompressDisable = getBit(value, 6); + copyCentroid = getBit(value, 7); + copySample = getBits(value, 10, 8); + zpassIncrementDisable = getBit(value, 11); + break; + + case DB_Z_READ_BASE: + zReadBase = static_cast(value) << 8; + break; + + case DB_Z_WRITE_BASE: + zWriteBase = static_cast(value) << 8; + break; + + case DB_DEPTH_CLEAR: + depthClear = std::bit_cast(value); + break; + + case DB_DEPTH_CONTROL: + stencilEnable = getBit(value, 0) != 0; + depthEnable = getBit(value, 1) != 0; + depthWriteEnable = getBit(value, 2) != 0; + depthBoundsEnable = getBit(value, 3) != 0; + zFunc = getBits(value, 6, 4); + backFaceEnable = getBit(value, 7); + stencilFunc = getBits(value, 11, 8); + stencilFuncBackFace = getBits(value, 23, 20); + + std::printf("stencilEnable=%u, depthEnable=%u, depthWriteEnable=%u, " + "depthBoundsEnable=%u, zFunc=%u, backFaceEnable=%u, " + "stencilFunc=%u, stencilFuncBackFace=%u\n", + stencilEnable, depthEnable, depthWriteEnable, + depthBoundsEnable, zFunc, backFaceEnable, stencilFunc, + stencilFuncBackFace); + break; + + case CB_TARGET_MASK: { + cbRenderTargetMask = value; + break; + } + + case CB_COLOR_CONTROL: { + /* + If true, then each UNORM format COLOR_8_8_8_8 + MRT is treated as an SRGB format instead. This affects + both normal draw and resolve. This bit exists for + compatibility with older architectures that did not have + an SRGB number type. + */ + auto degammaEnable = getBits(value, 3, 0); + + /* + This field selects standard color processing or one of + several major operation modes. + + POSSIBLE VALUES: + 00 - CB_DISABLE: Disables drawing to color + buffer. Causes DB to not send tiles/quads to CB. CB + itself ignores this field. + 01 - CB_NORMAL: Normal rendering mode. DB + should send tiles and quads for pixel exports or just + quads for compute exports. + 02 - CB_ELIMINATE_FAST_CLEAR: Fill fast + cleared color surface locations with clear color. DB + should send only tiles. + 03 - CB_RESOLVE: Read from MRT0, average all + samples, and write to MRT1, which is one-sample. DB + should send only tiles. + 04 - CB_DECOMPRESS: Decompress MRT0 to a + uncompressed color format. This is required before a + multisampled surface is accessed by the CPU, or used as + a texture. This also decompresses the FMASK buffer. A + CB_ELIMINATE_FAST_CLEAR pass before this is + unnecessary. DB should send tiles and quads. + 05 - CB_FMASK_DECOMPRESS: Decompress the + FMASK buffer into a texture readable format. A + CB_ELIMINATE_FAST_CLEAR pass before this is + unnecessary. DB should send only tiles. + */ + auto mode = getBits(value, 6, 4); + + /* + This field supports the 28 boolean ops that combine + either source and dest or brush and dest, with brush + provided by the shader in place of source. The code + 0xCC (11001100) copies the source to the destination, + which disables the ROP function. ROP must be disabled + if any MRT enables blending. + + POSSIBLE VALUES: + 00 - 0x00: BLACKNESS + 05 - 0x05 + 10 - 0x0A + 15 - 0x0F + 17 - 0x11: NOTSRCERASE + 34 - 0x22 + 51 - 0x33: NOTSRCCOPY + 68 - 0x44: SRCERASE + 80 - 0x50 + 85 - 0x55: DSTINVERT + 90 - 0x5A: PATINVERT + 95 - 0x5F + 102 - 0x66: SRCINVERT + 119 - 0x77 + 136 - 0x88: SRCAND + 153 - 0x99 + 160 - 0xA0 + 165 - 0xA5 + 170 - 0xAA + 175 - 0xAF + 187 - 0xBB: MERGEPAINT + 204 - 0xCC: SRCCOPY + 221 - 0xDD + 238 - 0xEE: SRCPAINT + 240 - 0xF0: PATCOPY + 245 - 0xF5 + 250 - 0xFA + 255 - 0xFF: WHITENESS + */ + auto rop3 = getBits(value, 23, 16); + + std::printf(" * degammaEnable = %x\n", degammaEnable); + std::printf(" * mode = %x\n", mode); + std::printf(" * rop3 = %x\n", rop3); + + cbColorFormat = static_cast(mode); + cbRasterOp = static_cast(rop3); + break; + } + + case PA_CL_CLIP_CNTL: + cullFront = getBit(value, 0); + cullBack = getBit(value, 1); + face = getBit(value, 2); + polyMode = getBits(value, 4, 3); + polyModeFrontPType = getBits(value, 7, 5); + polyModeBackPType = getBits(value, 10, 8); + polyOffsetFrontEnable = getBit(value, 11); + polyOffsetBackEnable = getBit(value, 12); + polyOffsetParaEnable = getBit(value, 13); + vtxWindowOffsetEnable = getBit(value, 16); + provokingVtxLast = getBit(value, 19); + erspCorrDis = getBit(value, 20); + multiPrimIbEna = getBit(value, 21); + break; + + case PA_SC_SCREEN_SCISSOR_TL: + screenScissorX = static_cast(value); + screenScissorY = static_cast(value >> 16); + break; + + case PA_SC_SCREEN_SCISSOR_BR: + screenScissorW = static_cast(value) - screenScissorX; + screenScissorH = static_cast(value >> 16) - screenScissorY; + break; + + case VGT_PRIMITIVE_TYPE: + vgtPrimitiveType = value; + break; + + case COMPUTE_NUM_THREAD_X: + computeNumThreadX = value; + break; + + case COMPUTE_NUM_THREAD_Y: + computeNumThreadY = value; + break; + + case COMPUTE_NUM_THREAD_Z: + computeNumThreadZ = value; + break; + + case COMPUTE_PGM_LO: + pgmComputeAddress &= ~((1ull << 40) - 1); + pgmComputeAddress |= static_cast(value) << 8; + break; + + case COMPUTE_PGM_HI: + pgmComputeAddress &= (1ull << 40) - 1; + pgmComputeAddress |= static_cast(value) << 40; + break; + + case COMPUTE_PGM_RSRC1: + break; + case COMPUTE_PGM_RSRC2: + computeUserSpgrs = (value >> 1) & 0x1f; + break; + + case COMPUTE_USER_DATA_0: + case COMPUTE_USER_DATA_1: + case COMPUTE_USER_DATA_2: + case COMPUTE_USER_DATA_3: + case COMPUTE_USER_DATA_4: + case COMPUTE_USER_DATA_5: + case COMPUTE_USER_DATA_6: + case COMPUTE_USER_DATA_7: + case COMPUTE_USER_DATA_8: + case COMPUTE_USER_DATA_9: + case COMPUTE_USER_DATA_10: + case COMPUTE_USER_DATA_11: + case COMPUTE_USER_DATA_12: + case COMPUTE_USER_DATA_13: + case COMPUTE_USER_DATA_14: + case COMPUTE_USER_DATA_15: + userComputeData[regId - COMPUTE_USER_DATA_0] = value; + break; + + case CB_BLEND0_CONTROL: { + blendColorSrc = (BlendMultiplier)fetchMaskedValue( + value, CB_BLEND0_CONTROL_COLOR_SRCBLEND_MASK); + blendColorFn = (BlendFunc)fetchMaskedValue( + value, CB_BLEND0_CONTROL_COLOR_COMB_FCN_MASK); + blendColorDst = (BlendMultiplier)fetchMaskedValue( + value, CB_BLEND0_CONTROL_COLOR_DESTBLEND_MASK); + auto opacity_weight = + fetchMaskedValue(value, CB_BLEND0_CONTROL_OPACITY_WEIGHT_MASK); + blendAlphaSrc = (BlendMultiplier)fetchMaskedValue( + value, CB_BLEND0_CONTROL_ALPHA_SRCBLEND_MASK); + blendAlphaFn = (BlendFunc)fetchMaskedValue( + value, CB_BLEND0_CONTROL_ALPHA_COMB_FCN_MASK); + blendAlphaDst = (BlendMultiplier)fetchMaskedValue( + value, CB_BLEND0_CONTROL_ALPHA_DESTBLEND_MASK); + blendSeparateAlpha = + fetchMaskedValue(value, + CB_BLEND0_CONTROL_SEPARATE_ALPHA_BLEND_MASK) != 0; + blendEnable = + fetchMaskedValue(value, CB_BLEND0_CONTROL_BLEND_ENABLE_MASK) != 0; + + std::printf(" * COLOR_SRCBLEND = %x\n", blendColorSrc); + std::printf(" * COLOR_COMB_FCN = %x\n", blendColorFn); + std::printf(" * COLOR_DESTBLEND = %x\n", blendColorDst); + std::printf(" * OPACITY_WEIGHT = %x\n", opacity_weight); + std::printf(" * ALPHA_SRCBLEND = %x\n", blendAlphaSrc); + std::printf(" * ALPHA_COMB_FCN = %x\n", blendAlphaFn); + std::printf(" * ALPHA_DESTBLEND = %x\n", blendAlphaDst); + std::printf(" * SEPARATE_ALPHA_BLEND = %x\n", blendSeparateAlpha); + std::printf(" * BLEND_ENABLE = %x\n", blendEnable); + break; + } + } + } }; -static CbColorFormat cbColorFormat = CbColorFormat::Normal; - -static CbRasterOp cbRasterOp = CbRasterOp::Copy; - -static std::uint32_t vgtPrimitiveType = 0; -static bool stencilEnable = false; -static bool depthEnable = false; -static bool depthWriteEnable = false; -static bool depthBoundsEnable = false; -static int zFunc = 0; -static bool backFaceEnable = false; -static int stencilFunc = 0; -static int stencilFuncBackFace = 0; - -static float depthClear = 1.f; - -static bool cullFront = false; -static bool cullBack = false; -static int face = 0; // 0 - CCW, 1 - CW -static bool polyMode = false; -static int polyModeFrontPType = 0; -static int polyModeBackPType = 0; -static bool polyOffsetFrontEnable = false; -static bool polyOffsetBackEnable = false; -static bool polyOffsetParaEnable = false; -static bool vtxWindowOffsetEnable = false; -static bool provokingVtxLast = false; -static bool erspCorrDis = false; -static bool multiPrimIbEna = false; - -static bool depthClearEnable = false; -static bool stencilClearEnable = false; -static bool depthCopy = false; -static bool stencilCopy = false; -static bool resummarizeEnable = false; -static bool stencilCompressDisable = false; -static bool depthCompressDisable = false; -static bool copyCentroid = false; -static int copySample = 0; -static bool zpassIncrementDisable = false; - -static std::uint64_t zReadBase = 0; -static std::uint64_t zWriteBase = 0; - -static BlendMultiplier blendColorSrc = {}; -static BlendFunc blendColorFn = {}; -static BlendMultiplier blendColorDst = {}; -static BlendMultiplier blendAlphaSrc = {}; -static BlendFunc blendAlphaFn = {}; -static BlendMultiplier blendAlphaDst = {}; -static bool blendSeparateAlpha = false; -static bool blendEnable = false; -static std::uint32_t cbRenderTargetMask = 0; - -static void setRegister(std::uint32_t regId, std::uint32_t value) { - switch (regId) { - case SPI_SHADER_PGM_LO_PS: - pgmPsAddress &= ~((1ull << 40) - 1); - pgmPsAddress |= static_cast(value) << 8; - break; - case SPI_SHADER_PGM_HI_PS: - pgmPsAddress &= (1ull << 40) - 1; - pgmPsAddress |= static_cast(value) << 40; - break; - case SPI_SHADER_PGM_LO_VS: - pgmVsAddress &= ~((1ull << 40) - 1); - pgmVsAddress |= static_cast(value) << 8; - break; - case SPI_SHADER_PGM_HI_VS: - pgmVsAddress &= (1ull << 40) - 1; - pgmVsAddress |= static_cast(value) << 40; - break; - - case SPI_SHADER_USER_DATA_VS_0: - case SPI_SHADER_USER_DATA_VS_1: - case SPI_SHADER_USER_DATA_VS_2: - case SPI_SHADER_USER_DATA_VS_3: - case SPI_SHADER_USER_DATA_VS_4: - case SPI_SHADER_USER_DATA_VS_5: - case SPI_SHADER_USER_DATA_VS_6: - case SPI_SHADER_USER_DATA_VS_7: - case SPI_SHADER_USER_DATA_VS_8: - case SPI_SHADER_USER_DATA_VS_9: - case SPI_SHADER_USER_DATA_VS_10: - case SPI_SHADER_USER_DATA_VS_11: - case SPI_SHADER_USER_DATA_VS_12: - case SPI_SHADER_USER_DATA_VS_13: - case SPI_SHADER_USER_DATA_VS_14: - case SPI_SHADER_USER_DATA_VS_15: - userVsData[regId - SPI_SHADER_USER_DATA_VS_0] = value; - break; - - case SPI_SHADER_USER_DATA_PS_0: - case SPI_SHADER_USER_DATA_PS_1: - case SPI_SHADER_USER_DATA_PS_2: - case SPI_SHADER_USER_DATA_PS_3: - case SPI_SHADER_USER_DATA_PS_4: - case SPI_SHADER_USER_DATA_PS_5: - case SPI_SHADER_USER_DATA_PS_6: - case SPI_SHADER_USER_DATA_PS_7: - case SPI_SHADER_USER_DATA_PS_8: - case SPI_SHADER_USER_DATA_PS_9: - case SPI_SHADER_USER_DATA_PS_10: - case SPI_SHADER_USER_DATA_PS_11: - case SPI_SHADER_USER_DATA_PS_12: - case SPI_SHADER_USER_DATA_PS_13: - case SPI_SHADER_USER_DATA_PS_14: - case SPI_SHADER_USER_DATA_PS_15: - userPsData[regId - SPI_SHADER_USER_DATA_PS_0] = value; - break; - - case SPI_SHADER_PGM_RSRC2_PS: - psUserSpgrs = (value >> 1) & 0x1f; - break; - - case SPI_SHADER_PGM_RSRC2_VS: - vsUserSpgrs = (value >> 1) & 0x1f; - break; - - case CB_COLOR0_BASE ... CB_COLOR6_DCC_BASE: { - auto buffer = (regId - CB_COLOR0_BASE) / (CB_COLOR1_BASE - CB_COLOR0_BASE); - auto index = (regId - CB_COLOR0_BASE) % (CB_COLOR1_BASE - CB_COLOR0_BASE); - colorBuffers[buffer].setRegister(index, value); - break; - } - - case DB_RENDER_CONTROL: - depthClearEnable = getBit(value, 0); - stencilClearEnable = getBit(value, 1); - depthCopy = getBit(value, 2); - stencilCopy = getBit(value, 3); - resummarizeEnable = getBit(value, 4); - stencilCompressDisable = getBit(value, 5); - depthCompressDisable = getBit(value, 6); - copyCentroid = getBit(value, 7); - copySample = getBits(value, 10, 8); - zpassIncrementDisable = getBit(value, 11); - break; - - case DB_Z_READ_BASE: - zReadBase = static_cast(value) << 8; - break; - - case DB_Z_WRITE_BASE: - zWriteBase = static_cast(value) << 8; - break; - - case DB_DEPTH_CLEAR: - depthClear = std::bit_cast(value); - break; - - case DB_DEPTH_CONTROL: - stencilEnable = getBit(value, 0) != 0; - depthEnable = getBit(value, 1) != 0; - depthWriteEnable = getBit(value, 2) != 0; - depthBoundsEnable = getBit(value, 3) != 0; - zFunc = getBits(value, 6, 4); - backFaceEnable = getBit(value, 7); - stencilFunc = getBits(value, 11, 8); - stencilFuncBackFace = getBits(value, 23, 20); - - std::printf("stencilEnable=%u, depthEnable=%u, depthWriteEnable=%u, " - "depthBoundsEnable=%u, zFunc=%u, backFaceEnable=%u, " - "stencilFunc=%u, stencilFuncBackFace=%u\n", - stencilEnable, depthEnable, depthWriteEnable, depthBoundsEnable, - zFunc, backFaceEnable, stencilFunc, stencilFuncBackFace); - break; - - case CB_TARGET_MASK: { - cbRenderTargetMask = value; - break; - } - - case CB_COLOR_CONTROL: { - /* - If true, then each UNORM format COLOR_8_8_8_8 - MRT is treated as an SRGB format instead. This affects - both normal draw and resolve. This bit exists for - compatibility with older architectures that did not have - an SRGB number type. - */ - auto degammaEnable = getBits(value, 3, 0); - - /* - This field selects standard color processing or one of - several major operation modes. - - POSSIBLE VALUES: - 00 - CB_DISABLE: Disables drawing to color - buffer. Causes DB to not send tiles/quads to CB. CB - itself ignores this field. - 01 - CB_NORMAL: Normal rendering mode. DB - should send tiles and quads for pixel exports or just - quads for compute exports. - 02 - CB_ELIMINATE_FAST_CLEAR: Fill fast - cleared color surface locations with clear color. DB - should send only tiles. - 03 - CB_RESOLVE: Read from MRT0, average all - samples, and write to MRT1, which is one-sample. DB - should send only tiles. - 04 - CB_DECOMPRESS: Decompress MRT0 to a - uncompressed color format. This is required before a - multisampled surface is accessed by the CPU, or used as - a texture. This also decompresses the FMASK buffer. A - CB_ELIMINATE_FAST_CLEAR pass before this is - unnecessary. DB should send tiles and quads. - 05 - CB_FMASK_DECOMPRESS: Decompress the - FMASK buffer into a texture readable format. A - CB_ELIMINATE_FAST_CLEAR pass before this is - unnecessary. DB should send only tiles. - */ - auto mode = getBits(value, 6, 4); - - /* - This field supports the 28 boolean ops that combine - either source and dest or brush and dest, with brush - provided by the shader in place of source. The code - 0xCC (11001100) copies the source to the destination, - which disables the ROP function. ROP must be disabled - if any MRT enables blending. - - POSSIBLE VALUES: - 00 - 0x00: BLACKNESS - 05 - 0x05 - 10 - 0x0A - 15 - 0x0F - 17 - 0x11: NOTSRCERASE - 34 - 0x22 - 51 - 0x33: NOTSRCCOPY - 68 - 0x44: SRCERASE - 80 - 0x50 - 85 - 0x55: DSTINVERT - 90 - 0x5A: PATINVERT - 95 - 0x5F - 102 - 0x66: SRCINVERT - 119 - 0x77 - 136 - 0x88: SRCAND - 153 - 0x99 - 160 - 0xA0 - 165 - 0xA5 - 170 - 0xAA - 175 - 0xAF - 187 - 0xBB: MERGEPAINT - 204 - 0xCC: SRCCOPY - 221 - 0xDD - 238 - 0xEE: SRCPAINT - 240 - 0xF0: PATCOPY - 245 - 0xF5 - 250 - 0xFA - 255 - 0xFF: WHITENESS - */ - auto rop3 = getBits(value, 23, 16); - - std::printf(" * degammaEnable = %x\n", degammaEnable); - std::printf(" * mode = %x\n", mode); - std::printf(" * rop3 = %x\n", rop3); - - cbColorFormat = static_cast(mode); - cbRasterOp = static_cast(rop3); - break; - } - - case PA_CL_CLIP_CNTL: - cullFront = getBit(value, 0); - cullBack = getBit(value, 1); - face = getBit(value, 2); - polyMode = getBits(value, 4, 3); - polyModeFrontPType = getBits(value, 7, 5); - polyModeBackPType = getBits(value, 10, 8); - polyOffsetFrontEnable = getBit(value, 11); - polyOffsetBackEnable = getBit(value, 12); - polyOffsetParaEnable = getBit(value, 13); - vtxWindowOffsetEnable = getBit(value, 16); - provokingVtxLast = getBit(value, 19); - erspCorrDis = getBit(value, 20); - multiPrimIbEna = getBit(value, 21); - break; - - case PA_SC_SCREEN_SCISSOR_TL: - screenScissorX = static_cast(value); - screenScissorY = static_cast(value >> 16); - break; - - case PA_SC_SCREEN_SCISSOR_BR: - screenScissorW = static_cast(value) - screenScissorX; - screenScissorH = static_cast(value >> 16) - screenScissorY; - break; - - case VGT_PRIMITIVE_TYPE: - vgtPrimitiveType = value; - break; - - case COMPUTE_NUM_THREAD_X: - computeNumThreadX = value; - break; - - case COMPUTE_NUM_THREAD_Y: - computeNumThreadY = value; - break; - - case COMPUTE_NUM_THREAD_Z: - computeNumThreadZ = value; - break; - - case COMPUTE_PGM_LO: - pgmComputeAddress &= ~((1ull << 40) - 1); - pgmComputeAddress |= static_cast(value) << 8; - break; - - case COMPUTE_PGM_HI: - pgmComputeAddress &= (1ull << 40) - 1; - pgmComputeAddress |= static_cast(value) << 40; - break; - - case COMPUTE_PGM_RSRC1: - break; - case COMPUTE_PGM_RSRC2: - computeUserSpgrs = (value >> 1) & 0x1f; - break; - - case COMPUTE_USER_DATA_0: - case COMPUTE_USER_DATA_1: - case COMPUTE_USER_DATA_2: - case COMPUTE_USER_DATA_3: - case COMPUTE_USER_DATA_4: - case COMPUTE_USER_DATA_5: - case COMPUTE_USER_DATA_6: - case COMPUTE_USER_DATA_7: - case COMPUTE_USER_DATA_8: - case COMPUTE_USER_DATA_9: - case COMPUTE_USER_DATA_10: - case COMPUTE_USER_DATA_11: - case COMPUTE_USER_DATA_12: - case COMPUTE_USER_DATA_13: - case COMPUTE_USER_DATA_14: - case COMPUTE_USER_DATA_15: - userComputeData[regId - COMPUTE_USER_DATA_0] = value; - break; - - case CB_BLEND0_CONTROL: { - blendColorSrc = (BlendMultiplier)fetchMaskedValue( - value, CB_BLEND0_CONTROL_COLOR_SRCBLEND_MASK); - blendColorFn = (BlendFunc)fetchMaskedValue( - value, CB_BLEND0_CONTROL_COLOR_COMB_FCN_MASK); - blendColorDst = (BlendMultiplier)fetchMaskedValue( - value, CB_BLEND0_CONTROL_COLOR_DESTBLEND_MASK); - auto opacity_weight = - fetchMaskedValue(value, CB_BLEND0_CONTROL_OPACITY_WEIGHT_MASK); - blendAlphaSrc = (BlendMultiplier)fetchMaskedValue( - value, CB_BLEND0_CONTROL_ALPHA_SRCBLEND_MASK); - blendAlphaFn = (BlendFunc)fetchMaskedValue( - value, CB_BLEND0_CONTROL_ALPHA_COMB_FCN_MASK); - blendAlphaDst = (BlendMultiplier)fetchMaskedValue( - value, CB_BLEND0_CONTROL_ALPHA_DESTBLEND_MASK); - blendSeparateAlpha = - fetchMaskedValue(value, CB_BLEND0_CONTROL_SEPARATE_ALPHA_BLEND_MASK) != - 0; - blendEnable = - fetchMaskedValue(value, CB_BLEND0_CONTROL_BLEND_ENABLE_MASK) != 0; - - std::printf(" * COLOR_SRCBLEND = %x\n", blendColorSrc); - std::printf(" * COLOR_COMB_FCN = %x\n", blendColorFn); - std::printf(" * COLOR_DESTBLEND = %x\n", blendColorDst); - std::printf(" * OPACITY_WEIGHT = %x\n", opacity_weight); - std::printf(" * ALPHA_SRCBLEND = %x\n", blendAlphaSrc); - std::printf(" * ALPHA_COMB_FCN = %x\n", blendAlphaFn); - std::printf(" * ALPHA_DESTBLEND = %x\n", blendAlphaDst); - std::printf(" * SEPARATE_ALPHA_BLEND = %x\n", blendSeparateAlpha); - std::printf(" * BLEND_ENABLE = %x\n", blendEnable); - break; - } - } -} - void ShaderModule::destroy() const { if (descriptorPool) { vkDestroyDescriptorPool(vk::g_vkDevice, descriptorPool, nullptr); @@ -1125,8 +1129,8 @@ createFramebuffer(VkRenderPass renderPass, VkExtent2D extent, } static VkPipeline createGraphicsPipeline( - VkPipelineLayout pipelineLayout, VkRenderPass renderPass, - VkPipelineCache pipelineCache, + QueueRegisters ®s, VkPipelineLayout pipelineLayout, + VkRenderPass renderPass, VkPipelineCache pipelineCache, VkPipelineVertexInputStateCreateInfo vertexInputInfo, VkPrimitiveTopology topology, std::span shaders) { @@ -1147,11 +1151,11 @@ static VkPipeline createGraphicsPipeline( rasterizer.rasterizerDiscardEnable = VK_FALSE; rasterizer.polygonMode = VK_POLYGON_MODE_FILL; rasterizer.cullMode = - (false && cullBack ? VK_CULL_MODE_BACK_BIT : VK_CULL_MODE_NONE) | - (false && cullFront ? VK_CULL_MODE_FRONT_BIT : VK_CULL_MODE_NONE); + (false && regs.cullBack ? VK_CULL_MODE_BACK_BIT : VK_CULL_MODE_NONE) | + (false && regs.cullFront ? VK_CULL_MODE_FRONT_BIT : VK_CULL_MODE_NONE); rasterizer.frontFace = - face ? VK_FRONT_FACE_CLOCKWISE : VK_FRONT_FACE_COUNTER_CLOCKWISE; + regs.face ? VK_FRONT_FACE_CLOCKWISE : VK_FRONT_FACE_COUNTER_CLOCKWISE; rasterizer.depthBiasEnable = VK_FALSE; // rasterizer.depthBiasConstantFactor = 0; // rasterizer.depthBiasClamp = 0; @@ -1167,11 +1171,11 @@ static VkPipeline createGraphicsPipeline( VkPipelineDepthStencilStateCreateInfo depthStencil{}; depthStencil.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; - depthStencil.depthTestEnable = depthEnable; - depthStencil.depthWriteEnable = depthWriteEnable; - depthStencil.depthCompareOp = (VkCompareOp)zFunc; - depthStencil.depthBoundsTestEnable = depthBoundsEnable; - // depthStencil.stencilTestEnable = stencilEnable; + depthStencil.depthTestEnable = regs.depthEnable; + depthStencil.depthWriteEnable = regs.depthWriteEnable; + depthStencil.depthCompareOp = (VkCompareOp)regs.zFunc; + depthStencil.depthBoundsTestEnable = regs.depthBoundsEnable; + // depthStencil.stencilTestEnable = regs.stencilEnable; // depthStencil.front; // depthStencil.back; depthStencil.minDepthBounds = 0.f; @@ -1179,19 +1183,19 @@ static VkPipeline createGraphicsPipeline( VkPipelineColorBlendAttachmentState colorBlendAttachment{}; - colorBlendAttachment.blendEnable = blendEnable; + colorBlendAttachment.blendEnable = regs.blendEnable; colorBlendAttachment.srcColorBlendFactor = - blendMultiplierToVkBlendFactor(blendColorSrc); + blendMultiplierToVkBlendFactor(regs.blendColorSrc); colorBlendAttachment.dstColorBlendFactor = - blendMultiplierToVkBlendFactor(blendColorDst); - colorBlendAttachment.colorBlendOp = blendFuncToVkBlendOp(blendColorFn); + blendMultiplierToVkBlendFactor(regs.blendColorDst); + colorBlendAttachment.colorBlendOp = blendFuncToVkBlendOp(regs.blendColorFn); - if (blendSeparateAlpha) { + if (regs.blendSeparateAlpha) { colorBlendAttachment.srcAlphaBlendFactor = - blendMultiplierToVkBlendFactor(blendAlphaSrc); + blendMultiplierToVkBlendFactor(regs.blendAlphaSrc); colorBlendAttachment.dstAlphaBlendFactor = - blendMultiplierToVkBlendFactor(blendAlphaDst); - colorBlendAttachment.alphaBlendOp = blendFuncToVkBlendOp(blendAlphaFn); + blendMultiplierToVkBlendFactor(regs.blendAlphaDst); + colorBlendAttachment.alphaBlendOp = blendFuncToVkBlendOp(regs.blendAlphaFn); } else { colorBlendAttachment.srcAlphaBlendFactor = colorBlendAttachment.srcColorBlendFactor; @@ -2215,6 +2219,7 @@ static void submitToQueue(VkQueue queue, VkCommandBuffer cmdBuffer, struct RenderState { DrawContext &ctxt; + QueueRegisters ®s; std::vector vertexBindings; std::vector vertexAttrs; std::vector descriptorSetLayoutBindings; @@ -2448,8 +2453,8 @@ struct RenderState { void resolve() { // TODO: when texture cache will be implemented it MSAA should be done by // GPU - auto srcBuffer = colorBuffers[0]; - auto dstBuffer = colorBuffers[1]; + auto srcBuffer = regs.colorBuffers[0]; + auto dstBuffer = regs.colorBuffers[1]; const auto src = g_hostMemory.getPointer(srcBuffer.base); auto dst = g_hostMemory.getPointer(dstBuffer.base); @@ -2458,30 +2463,30 @@ struct RenderState { return; } - std::memcpy(dst, src, screenScissorH * screenScissorW * 4); + std::memcpy(dst, src, regs.screenScissorH * regs.screenScissorW * 4); } void draw(std::uint32_t count, std::uint64_t indeciesAddress, std::uint32_t indexCount) { - if (cbColorFormat == CbColorFormat::Disable) { + if (regs.cbColorFormat == CbColorFormat::Disable) { return; } - if (cbColorFormat == CbColorFormat::EliminateFastClear) { + if (regs.cbColorFormat == CbColorFormat::EliminateFastClear) { eliminateFastClear(); return; } - if (cbColorFormat == CbColorFormat::Resolve) { + if (regs.cbColorFormat == CbColorFormat::Resolve) { resolve(); return; } - if (pgmVsAddress == 0 || pgmPsAddress == 0) { + if (regs.pgmVsAddress == 0 || regs.pgmPsAddress == 0) { return; } - if (cbRenderTargetMask == 0 || colorBuffers[0].base == 0) { + if (regs.cbRenderTargetMask == 0 || regs.colorBuffers[0].base == 0) { return; } @@ -2490,7 +2495,7 @@ struct RenderState { getHostVisibleMemory().clear(); getDeviceLocalMemory().clear(); - depthClearEnable = true; + // regs.depthClearEnable = true; vk::Semaphore sem = vk::Semaphore::Create(); @@ -2514,15 +2519,15 @@ struct RenderState { VkCommandBuffer readCommandBuffer = transferCommandBuffers[0]; Verify() << vkBeginCommandBuffer(readCommandBuffer, &beginInfo); - auto primType = static_cast(vgtPrimitiveType); + auto primType = static_cast(regs.vgtPrimitiveType); int bindingOffset = 0; auto vertexShader = - loadShader(readCommandBuffer, shader::Stage::Vertex, pgmVsAddress, - userVsData, vsUserSpgrs, bindingOffset); - auto fragmentShader = - loadShader(readCommandBuffer, shader::Stage::Fragment, pgmPsAddress, - userPsData, psUserSpgrs, bindingOffset); + loadShader(readCommandBuffer, shader::Stage::Vertex, regs.pgmVsAddress, + regs.userVsData, regs.vsUserSpgrs, bindingOffset); + auto fragmentShader = loadShader(readCommandBuffer, shader::Stage::Fragment, + regs.pgmPsAddress, regs.userPsData, + regs.psUserSpgrs, bindingOffset); auto depthFormat = VK_FORMAT_D32_SFLOAT_S8_UINT; // TODO @@ -2532,8 +2537,8 @@ struct RenderState { std::vector colorAttachments; VkAttachmentReference depthAttachment; - for (auto targetMask = cbRenderTargetMask; - auto &colorBuffer : colorBuffers) { + for (auto targetMask = regs.cbRenderTargetMask; + auto &colorBuffer : regs.colorBuffers) { if (targetMask == 0 || colorBuffer.base == 0) { break; } @@ -2549,11 +2554,12 @@ struct RenderState { surfaceFormatToVkFormat((SurfaceFormat)colorBuffer.format, TextureChannelType::kTextureChannelTypeSrgb); - auto colorImageHandle = vk::Image2D::Allocate( - getDeviceLocalMemory(), screenScissorW, screenScissorH, format, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | - VK_IMAGE_USAGE_TRANSFER_DST_BIT); + auto colorImageHandle = + vk::Image2D::Allocate(getDeviceLocalMemory(), regs.screenScissorW, + regs.screenScissorH, format, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT); auto colorImage = vk::ImageRef(colorImageHandle); colorImages.push_back(std::move(colorImageHandle)); @@ -2591,20 +2597,21 @@ struct RenderState { }); } - auto depthImageHandle = vk::Image2D::Allocate( - getDeviceLocalMemory(), screenScissorW, screenScissorH, depthFormat, - VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | - (depthClearEnable || zReadBase == 0 - ? 0 - : VK_IMAGE_USAGE_TRANSFER_DST_BIT)); + auto depthImageHandle = + vk::Image2D::Allocate(getDeviceLocalMemory(), regs.screenScissorW, + regs.screenScissorH, depthFormat, + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + (regs.depthClearEnable || regs.zReadBase == 0 + ? 0 + : VK_IMAGE_USAGE_TRANSFER_DST_BIT)); auto depthImage = vk::ImageRef(depthImageHandle); { - if (!depthClearEnable && zReadBase) { + if (!regs.depthClearEnable && regs.zReadBase) { buffers.push_back( depthImage.read(readCommandBuffer, getHostVisibleMemory(), - g_hostMemory.getPointer(zReadBase), + g_hostMemory.getPointer(regs.zReadBase), kTileModeDisplay_LinearAligned, // TODO VK_IMAGE_ASPECT_DEPTH_BIT, 4)); } @@ -2617,14 +2624,14 @@ struct RenderState { attachments.push_back({ .format = depthFormat, .samples = VK_SAMPLE_COUNT_1_BIT, - .loadOp = !depthClearEnable && zReadBase + .loadOp = !regs.depthClearEnable && regs.zReadBase ? VK_ATTACHMENT_LOAD_OP_LOAD : VK_ATTACHMENT_LOAD_OP_CLEAR, - .storeOp = depthWriteEnable && zWriteBase + .storeOp = regs.depthWriteEnable && regs.zWriteBase ? VK_ATTACHMENT_STORE_OP_STORE : VK_ATTACHMENT_STORE_OP_DONT_CARE, - .stencilLoadOp = stencilClearEnable ? VK_ATTACHMENT_LOAD_OP_CLEAR - : VK_ATTACHMENT_LOAD_OP_LOAD, + .stencilLoadOp = regs.stencilClearEnable ? VK_ATTACHMENT_LOAD_OP_CLEAR + : VK_ATTACHMENT_LOAD_OP_LOAD, .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, .initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, .finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, @@ -2703,7 +2710,8 @@ struct RenderState { framebufferAttachments.push_back(depthImageView); auto framebuffer = createFramebuffer( - renderPass, {screenScissorW, screenScissorH}, framebufferAttachments); + renderPass, {regs.screenScissorW, regs.screenScissorH}, + framebufferAttachments); ShaderModule shader{}; @@ -2738,7 +2746,7 @@ struct RenderState { VK_SHADER_STAGE_FRAGMENT_BIT)); shader.pipeline = createGraphicsPipeline( - shader.pipelineLayout, renderPass, ctxt.pipelineCache, + regs, shader.pipelineLayout, renderPass, ctxt.pipelineCache, createPipelineVertexInputState(vertexBindings, vertexAttrs), getVkPrimitiveType(primType), shaders); @@ -2758,13 +2766,14 @@ struct RenderState { VkClearValue clearValues[2]; clearValues[0].color = {{1.f, 1.f, 1.f, 1.0f}}; - clearValues[1].depthStencil = {depthClear, 0}; + clearValues[1].depthStencil = {regs.depthClear, 0}; VkRenderPassBeginInfo renderPassBeginInfo{}; renderPassBeginInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; renderPassBeginInfo.renderPass = renderPass; renderPassBeginInfo.framebuffer = framebuffer; - renderPassBeginInfo.renderArea.extent = {screenScissorW, screenScissorH}; + renderPassBeginInfo.renderArea.extent = {regs.screenScissorW, + regs.screenScissorH}; renderPassBeginInfo.clearValueCount = 2; renderPassBeginInfo.pClearValues = clearValues; @@ -2775,19 +2784,19 @@ struct RenderState { shader.pipeline); VkViewport viewport{}; - viewport.x = screenScissorX; - viewport.y = (float)screenScissorH - screenScissorY; - viewport.width = screenScissorW; - viewport.height = -(float)screenScissorH; + viewport.x = regs.screenScissorX; + viewport.y = (float)regs.screenScissorH - regs.screenScissorY; + viewport.width = regs.screenScissorW; + viewport.height = -(float)regs.screenScissorH; viewport.minDepth = 0.0f; viewport.maxDepth = 1.0f; vkCmdSetViewport(drawCommandBuffer, 0, 1, &viewport); VkRect2D scissor{}; - scissor.extent.width = screenScissorW; - scissor.extent.height = screenScissorH; - scissor.offset.x = screenScissorX; - scissor.offset.y = screenScissorY; + scissor.extent.width = regs.screenScissorW; + scissor.extent.height = regs.screenScissorH; + scissor.offset.x = regs.screenScissorX; + scissor.offset.y = regs.screenScissorY; vkCmdSetScissor(drawCommandBuffer, 0, 1, &scissor); vkCmdBindDescriptorSets(drawCommandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, @@ -2797,8 +2806,9 @@ struct RenderState { vk::Buffer indexBufferStorage; BufferRef indexBuffer; auto needConversion = isPrimRequiresConversion(primType); - VkIndexType vkIndexType = - (indexType & 0x1f) == 0 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32; + VkIndexType vkIndexType = (regs.indexType & 0x1f) == 0 + ? VK_INDEX_TYPE_UINT16 + : VK_INDEX_TYPE_UINT32; if (needConversion) { auto indecies = g_hostMemory.getPointer(indeciesAddress); @@ -2901,7 +2911,7 @@ struct RenderState { } vk::Buffer resultDepthBuffer; - if (depthWriteEnable && zWriteBase != 0) { + if (regs.depthWriteEnable && regs.zWriteBase != 0) { resultDepthBuffer = depthImage.writeToBuffer(writeCommandBuffer, getHostVisibleMemory(), VK_IMAGE_ASPECT_DEPTH_BIT); @@ -2915,22 +2925,22 @@ struct RenderState { // TODO: make image read/write on gpu side for (std::size_t i = 0, end = resultColorBuffers.size(); i < end; ++i) { - auto &colorBuffer = colorBuffers[i]; + auto &colorBuffer = regs.colorBuffers[i]; resultColorBuffers[i].writeAsImageTo( g_hostMemory.getPointer(colorBuffer.base), getBitWidthOfSurfaceFormat((SurfaceFormat)colorBuffer.format) / 8, - (TileMode)colorBuffer.tileModeIndex, screenScissorW, screenScissorH, - 1, screenScissorW); + (TileMode)colorBuffer.tileModeIndex, regs.screenScissorW, + regs.screenScissorH, 1, regs.screenScissorW); std::printf("Writing color to %lx\n", colorBuffer.base); } - if (depthWriteEnable && zWriteBase != 0) { - resultDepthBuffer.writeAsImageTo(g_hostMemory.getPointer(zWriteBase), 4, - kTileModeDisplay_LinearAligned, // TODO - screenScissorW, screenScissorH, 1, - screenScissorW); + if (regs.depthWriteEnable && regs.zWriteBase != 0) { + resultDepthBuffer.writeAsImageTo( + g_hostMemory.getPointer(regs.zWriteBase), 4, + kTileModeDisplay_LinearAligned, // TODO + regs.screenScissorW, regs.screenScissorH, 1, regs.screenScissorW); } shader.destroy(); @@ -2964,10 +2974,10 @@ struct RenderState { vkAllocateCommandBuffers(vk::g_vkDevice, &allocInfo, &commandBuffer); vkBeginCommandBuffer(commandBuffer, &beginInfo); - auto computeShader = - loadShader(commandBuffer, shader::Stage::Compute, pgmComputeAddress, - userComputeData, computeUserSpgrs, bindingOffset, - computeNumThreadX, computeNumThreadY, computeNumThreadZ); + auto computeShader = loadShader( + commandBuffer, shader::Stage::Compute, regs.pgmComputeAddress, + regs.userComputeData, regs.computeUserSpgrs, bindingOffset, + regs.computeNumThreadX, regs.computeNumThreadY, regs.computeNumThreadZ); ShaderModule shader{}; shader.descriptorSetLayout = @@ -3011,15 +3021,16 @@ struct RenderState { } }; -static void draw(DrawContext &ctxt, std::uint32_t count, +static void draw(DrawContext &ctxt, QueueRegisters ®s, std::uint32_t count, std::uint64_t indeciesAddress, std::uint32_t indexCount) { - RenderState{.ctxt = ctxt}.draw(count, indeciesAddress, indexCount); + RenderState{.ctxt = ctxt, .regs = regs}.draw(count, indeciesAddress, + indexCount); } -static void dispatch(DrawContext &ctxt, std::size_t dimX, std::size_t dimY, - std::size_t dimZ) { +static void dispatch(DrawContext &ctxt, QueueRegisters ®s, std::size_t dimX, + std::size_t dimY, std::size_t dimZ) { - RenderState{.ctxt = ctxt}.dispatch(dimX, dimY, dimZ); + RenderState{.ctxt = ctxt, .regs = regs}.dispatch(dimX, dimY, dimZ); } enum class EventWriteSource : std::uint8_t { @@ -3071,17 +3082,18 @@ static void writeEop(EopData data) { } static void drawIndexAuto(amdgpu::device::DrawContext &ctxt, - std::uint32_t count) { - draw(ctxt, count, 0, 0); + QueueRegisters ®s, std::uint32_t count) { + draw(ctxt, regs, count, 0, 0); } -static void drawIndex2(amdgpu::device::DrawContext &ctxt, std::uint32_t maxSize, - std::uint64_t address, std::uint32_t count) { - draw(ctxt, count, address, maxSize); +static void drawIndex2(amdgpu::device::DrawContext &ctxt, QueueRegisters ®s, + std::uint32_t maxSize, std::uint64_t address, + std::uint32_t count) { + draw(ctxt, regs, count, address, maxSize); } -static void handleCommandBuffer(DrawContext &ctxt, std::uint32_t *cmds, - std::uint32_t count) { +static void handleCommandBuffer(DrawContext &ctxt, QueueRegisters ®s, + std::uint32_t *cmds, std::uint32_t count) { bool log = true; for (std::uint32_t cmdOffset = 0; cmdOffset < count; ++cmdOffset) { auto cmd = cmds[cmdOffset]; @@ -3141,8 +3153,8 @@ static void handleCommandBuffer(DrawContext &ctxt, std::uint32_t *cmds, cmds[cmdOffset + 2 + regOffset]); } - setRegister(baseRegOffset + regOffset, - cmds[cmdOffset + 2 + regOffset]); + regs.setRegister(baseRegOffset + regOffset, + cmds[cmdOffset + 2 + regOffset]); } break; } @@ -3158,8 +3170,8 @@ static void handleCommandBuffer(DrawContext &ctxt, std::uint32_t *cmds, registerToString(baseRegOffset + regOffset).c_str(), cmds[cmdOffset + 2 + regOffset]); } - setRegister(baseRegOffset + regOffset, - cmds[cmdOffset + 2 + regOffset]); + regs.setRegister(baseRegOffset + regOffset, + cmds[cmdOffset + 2 + regOffset]); } break; } @@ -3176,8 +3188,8 @@ static void handleCommandBuffer(DrawContext &ctxt, std::uint32_t *cmds, cmds[cmdOffset + 2 + regOffset]); } - setRegister(baseRegOffset + regOffset, - cmds[cmdOffset + 2 + regOffset]); + regs.setRegister(baseRegOffset + regOffset, + cmds[cmdOffset + 2 + regOffset]); } break; } @@ -3246,12 +3258,12 @@ static void handleCommandBuffer(DrawContext &ctxt, std::uint32_t *cmds, } case kOpcodeINDEX_TYPE: { - indexType = cmds[cmdOffset + 1]; + regs.indexType = cmds[cmdOffset + 1]; break; } case kOpcodeDRAW_INDEX_AUTO: { - drawIndexAuto(ctxt, cmds[cmdOffset + 1]); + drawIndexAuto(ctxt, regs, cmds[cmdOffset + 1]); break; } @@ -3260,7 +3272,7 @@ static void handleCommandBuffer(DrawContext &ctxt, std::uint32_t *cmds, auto address = cmds[cmdOffset + 2] | (static_cast(cmds[cmdOffset + 3]) << 32); auto count = cmds[cmdOffset + 4]; - drawIndex2(ctxt, maxSize, address, count); + drawIndex2(ctxt, regs, maxSize, address, count); break; } @@ -3273,7 +3285,7 @@ static void handleCommandBuffer(DrawContext &ctxt, std::uint32_t *cmds, std::printf(" %04x: DIM Y=%u\n", cmdOffset + 2, dimY); std::printf(" %04x: DIM Z=%u\n", cmdOffset + 3, dimZ); } - dispatch(ctxt, dimX, dimY, dimZ); + dispatch(ctxt, regs, dimX, dimY, dimZ); } case kOpcodeEVENT_WRITE_EOP: { @@ -3425,14 +3437,18 @@ void amdgpu::device::AmdgpuDevice::handleProtectMemory(std::uint64_t address, std::printf("Unmapped area at %zx, size %lx\n", address, size); } } -void amdgpu::device::AmdgpuDevice::handleCommandBuffer(std::uint64_t address, + +static std::unordered_map queueRegs; + +void amdgpu::device::AmdgpuDevice::handleCommandBuffer(std::uint64_t queueId, + std::uint64_t address, std::uint64_t size) { auto count = size / sizeof(std::uint32_t); std::printf("address = %lx, count = %lx\n", address, count); - ::handleCommandBuffer(dc, g_hostMemory.getPointer(address), - count); + ::handleCommandBuffer(dc, queueRegs[queueId], + g_hostMemory.getPointer(address), count); } bool amdgpu::device::AmdgpuDevice::handleFlip( diff --git a/rpcsx-gpu/main.cpp b/rpcsx-gpu/main.cpp index ec411153d..ae2a916db 100644 --- a/rpcsx-gpu/main.cpp +++ b/rpcsx-gpu/main.cpp @@ -763,7 +763,8 @@ int main(int argc, const char *argv[]) { cmd.memoryProt.prot); break; case amdgpu::bridge::CommandId::CommandBuffer: - device.handleCommandBuffer(cmd.commandBuffer.address, + device.handleCommandBuffer(cmd.commandBuffer.queue, + cmd.commandBuffer.address, cmd.commandBuffer.size); break; case amdgpu::bridge::CommandId::Flip: {