From 0287d42aa51232b7922cb939c319c9692b7913c6 Mon Sep 17 00:00:00 2001 From: DH Date: Sun, 20 Oct 2024 23:32:59 +0300 Subject: [PATCH] gpu: implement image buffer --- rpcsx/gpu/Cache.cpp | 899 ++++++++++++++------- rpcsx/gpu/Cache.hpp | 56 +- rpcsx/gpu/Device.cpp | 12 +- rpcsx/gpu/FlipPipeline.cpp | 10 +- rpcsx/gpu/Renderer.cpp | 23 +- rpcsx/gpu/lib/gcn-shader/shaders/rdna.glsl | 174 ++-- 6 files changed, 826 insertions(+), 348 deletions(-) diff --git a/rpcsx/gpu/Cache.cpp b/rpcsx/gpu/Cache.cpp index 3c000f541..4692cb54c 100644 --- a/rpcsx/gpu/Cache.cpp +++ b/rpcsx/gpu/Cache.cpp @@ -2,6 +2,7 @@ #include "Device.hpp" #include "amdgpu/tiler.hpp" #include "gnm/vulkan.hpp" +#include "rx/hexdump.hpp" #include "rx/mem.hpp" #include "shader/Evaluator.hpp" #include "shader/GcnConverter.hpp" @@ -16,6 +17,7 @@ #include #include #include +#include #include #include @@ -209,14 +211,14 @@ void Cache::ShaderResources::loadResources( rx::die("failed to evaluate 128 bit T#"); } - gnm::TBuffer buffer{}; - std::memcpy(reinterpret_cast(&buffer), &*word0, + gnm::TBuffer tbuffer{}; + std::memcpy(reinterpret_cast(&tbuffer), &*word0, sizeof(std::uint32_t)); - std::memcpy(reinterpret_cast(&buffer) + 1, &*word1, + std::memcpy(reinterpret_cast(&tbuffer) + 1, &*word1, sizeof(std::uint32_t)); - std::memcpy(reinterpret_cast(&buffer) + 2, &*word2, + std::memcpy(reinterpret_cast(&tbuffer) + 2, &*word2, sizeof(std::uint32_t)); - std::memcpy(reinterpret_cast(&buffer) + 3, &*word3, + std::memcpy(reinterpret_cast(&tbuffer) + 3, &*word3, sizeof(std::uint32_t)); if (texture.words[4] != nullptr) { @@ -230,19 +232,19 @@ void Cache::ShaderResources::loadResources( rx::die("failed to evaluate 256 bit T#"); } - std::memcpy(reinterpret_cast(&buffer) + 4, &*word4, + std::memcpy(reinterpret_cast(&tbuffer) + 4, &*word4, sizeof(std::uint32_t)); - std::memcpy(reinterpret_cast(&buffer) + 5, &*word5, + std::memcpy(reinterpret_cast(&tbuffer) + 5, &*word5, sizeof(std::uint32_t)); - std::memcpy(reinterpret_cast(&buffer) + 6, &*word6, + std::memcpy(reinterpret_cast(&tbuffer) + 6, &*word6, sizeof(std::uint32_t)); - std::memcpy(reinterpret_cast(&buffer) + 7, &*word7, + std::memcpy(reinterpret_cast(&tbuffer) + 7, &*word7, sizeof(std::uint32_t)); } std::vector *resources = nullptr; - switch (buffer.type) { + switch (tbuffer.type) { case gnm::TextureType::Array1D: case gnm::TextureType::Dim1D: resources = &imageResources[0]; @@ -261,11 +263,11 @@ void Cache::ShaderResources::loadResources( rx::dieIf(resources == nullptr, "ShaderResources: unexpected texture type %u", - static_cast(buffer.type)); + static_cast(tbuffer.type)); slotResources[slotOffset + texture.resourceSlot] = resources->size(); resources->push_back(cacheTag->getImageView( - amdgpu::ImageKey::createFrom(buffer), texture.access)); + amdgpu::ImageViewKey::createFrom(tbuffer), texture.access)); } for (auto &sampler : res.samplers) { @@ -548,24 +550,27 @@ struct Cache::Entry { acquiredTag = tag->mStorage; } - void release(Cache::Tag *tag) { + bool release(Cache::Tag *tag) { if (acquiredTag != tag->mStorage) { - return; + return false; } auto access = acquiredAccess.load(std::memory_order::relaxed); + bool hasSubmits = false; if ((access & Access::Write) == Access::Write) { tagId = tag->getWriteId(); - release(tag, access); + hasSubmits = release(tag, access); } acquiredTag = nullptr; acquiredAccess.store(Access::None, std::memory_order::release); acquiredAccess.notify_one(); + + return hasSubmits; } - virtual void release(Cache::Tag *tag, Access access) {} + virtual bool release(Cache::Tag *tag, Access access) { return false; } }; struct CachedShader : Cache::Entry { @@ -613,9 +618,9 @@ struct CachedHostVisibleBuffer : CachedBuffer { return !kDisableCache && addressRange.size() >= rx::mem::pageSize; } - void flush(void *target, rx::AddressRange range) { + bool flush(void *target, rx::AddressRange range) { if (!hasDelayedFlush) { - return; + return false; } hasDelayedFlush = false; @@ -623,6 +628,8 @@ struct CachedHostVisibleBuffer : CachedBuffer { auto data = buffer.getData() + range.beginAddress() - addressRange.beginAddress(); std::memcpy(target, data, range.size()); + + return false; } void update(rx::AddressRange range, void *from) { @@ -631,20 +638,22 @@ struct CachedHostVisibleBuffer : CachedBuffer { std::memcpy(data, from, range.size()); } - void release(Cache::Tag *tag, Access) override { + bool release(Cache::Tag *tag, Access) override { if (addressRange.beginAddress() == 0) { - return; + return false; } auto locked = expensive(); tag->getCache()->trackWrite(addressRange, tagId, locked); hasDelayedFlush = true; - if (!locked) { - auto address = - RemoteMemory{tag->getVmId()}.getPointer(addressRange.beginAddress()); - flush(address, addressRange); + if (locked) { + return false; } + + auto address = + RemoteMemory{tag->getVmId()}.getPointer(addressRange.beginAddress()); + return flush(address, addressRange); } }; @@ -668,16 +677,200 @@ constexpr VkImageAspectFlags toAspect(ImageKind kind) { return VK_IMAGE_ASPECT_NONE; } -struct CachedImage : Cache::Entry { - vk::Image image; +struct CachedImageBuffer : Cache::Entry { + vk::Buffer buffer; GpuTiler *tiler; - ImageKind kind; TileMode tileMode{}; gnm::DataFormat dfmt{}; std::uint32_t pitch{}; SurfaceInfo info; + unsigned mipLevels = 1; + unsigned arrayLayers = 1; + unsigned width = 1; + unsigned height = 1; + unsigned depth = 1; - bool expensive() { return !kDisableCache; } + bool expensive() { + if (kDisableCache) { + return false; + } + + if (isLinear() && info.totalTiledSize < rx::mem::pageSize) { + return false; + } + + return true; + } + + [[nodiscard]] bool isLinear() const { + return tileMode.arrayMode() == kArrayModeLinearGeneral || + tileMode.arrayMode() == kArrayModeLinearAligned; + } + + [[nodiscard]] VkImageSubresourceRange + getSubresource(rx::AddressRange range) const { + auto offset = range.beginAddress() - addressRange.beginAddress(); + auto size = range.size(); + std::uint32_t firstMip = -1; + std::uint32_t lastMip = 0; + + for (std::uint32_t mipLevel = 0; mipLevel < mipLevels; ++mipLevel) { + auto &mipInfo = info.getSubresourceInfo(mipLevel); + if (mipInfo.tiledOffset > offset + size) { + break; + } + + if (mipInfo.tiledOffset + mipInfo.tiledSize * arrayLayers < offset) { + continue; + } + + firstMip = std::min(firstMip, mipLevel); + lastMip = std::max(lastMip, mipLevel); + } + + assert(firstMip <= lastMip); + + return { + .aspectMask = 0, + .baseMipLevel = firstMip, + .levelCount = lastMip - firstMip + 1, + .baseArrayLayer = 0, + .layerCount = arrayLayers, + }; + } + + [[nodiscard]] std::size_t getTiledSize() const { return info.totalTiledSize; } + [[nodiscard]] std::size_t getLinerSize() const { + return info.totalLinearSize; + } + + void update(Cache::Tag *tag, rx::AddressRange range, + Cache::Buffer tiledBuffer) { + auto subresource = getSubresource(range); + auto &sched = tag->getScheduler(); + + if (!isLinear()) { + auto linearAddress = buffer.getAddress(); + + for (unsigned mipLevel = subresource.baseMipLevel; + mipLevel < subresource.baseMipLevel + subresource.levelCount; + ++mipLevel) { + tiler->detile(sched, info, tileMode, tiledBuffer.deviceAddress, + info.totalTiledSize, linearAddress, info.totalLinearSize, + mipLevel, 0, info.arrayLayerCount); + } + return; + } + + std::vector regions; + regions.reserve(subresource.levelCount); + + for (unsigned mipLevel = subresource.baseMipLevel; + mipLevel < subresource.baseMipLevel + subresource.levelCount; + ++mipLevel) { + auto &mipInfo = info.getSubresourceInfo(mipLevel); + regions.push_back({ + .srcOffset = mipInfo.tiledOffset + tiledBuffer.offset, + .dstOffset = mipInfo.linearOffset, + .size = mipInfo.linearSize, + }); + } + + vkCmdCopyBuffer(sched.getCommandBuffer(), tiledBuffer.handle, + buffer.getHandle(), regions.size(), regions.data()); + } + + void write(Scheduler &scheduler, Cache::Buffer tiledBuffer, + const VkImageSubresourceRange &subresourceRange) { + if (!isLinear()) { + for (unsigned mipLevel = 0; mipLevel < subresourceRange.levelCount; + ++mipLevel) { + tiler->tile(scheduler, info, tileMode, buffer.getAddress(), + info.totalLinearSize, tiledBuffer.deviceAddress, + info.totalTiledSize, mipLevel, 0, + subresourceRange.levelCount); + } + + return; + } + + std::vector regions; + regions.reserve(subresourceRange.levelCount); + + for (unsigned mipLevelOffset = 0; + mipLevelOffset < subresourceRange.levelCount; ++mipLevelOffset) { + auto mipLevel = mipLevelOffset + subresourceRange.baseMipLevel; + auto &mipInfo = info.getSubresourceInfo(mipLevel); + + regions.push_back({ + .srcOffset = mipInfo.linearOffset, + .dstOffset = mipInfo.tiledOffset + tiledBuffer.offset, + .size = mipInfo.linearSize, + }); + } + + vkCmdCopyBuffer(scheduler.getCommandBuffer(), buffer.getHandle(), + tiledBuffer.handle, regions.size(), regions.data()); + } + + bool flush(Cache::Tag &tag, Scheduler &scheduler, rx::AddressRange range) { + if (!hasDelayedFlush) { + return false; + } + + hasDelayedFlush = false; + + auto subresourceRange = getSubresource(range); + auto beginOffset = + info.getSubresourceInfo(subresourceRange.baseMipLevel).tiledOffset; + auto lastLevelInfo = info.getSubresourceInfo( + subresourceRange.baseMipLevel + subresourceRange.levelCount - 1); + auto totalTiledSubresourceSize = + lastLevelInfo.tiledOffset + + lastLevelInfo.tiledSize * subresourceRange.layerCount; + + auto targetRange = rx::AddressRange::fromBeginSize( + range.beginAddress() + beginOffset, totalTiledSubresourceSize); + + auto tiledBuffer = tag.getBuffer(targetRange, Access::Write); + + write(scheduler, tiledBuffer, subresourceRange); + return true; + } + + bool release(Cache::Tag *tag, Access) override { + hasDelayedFlush = true; + auto locked = expensive(); + + for (auto &subresource : std::span(info.subresources, mipLevels)) { + auto subresourceRange = rx::AddressRange::fromBeginSize( + subresource.tiledOffset + addressRange.beginAddress(), + subresource.tiledSize); + + tag->getCache()->trackWrite(subresourceRange, tagId, locked); + } + + if (locked) { + return false; + } + + return flush(*tag, tag->getScheduler(), addressRange); + } +}; + +struct CachedImage : Cache::Entry { + vk::Image image; + ImageKind kind; + ImageBufferKey imageBufferKey; + SurfaceInfo info; + + bool expensive() { + if (kDisableCache) { + return false; + } + + return info.totalTiledSize >= rx::mem::pageSize; + } [[nodiscard]] VkImageSubresourceRange getSubresource(rx::AddressRange range) const { @@ -719,10 +912,7 @@ struct CachedImage : Cache::Entry { } void update(Cache::Tag *tag, rx::AddressRange range, - Cache::Buffer tiledBuffer) { - bool isLinear = tileMode.arrayMode() == kArrayModeLinearGeneral || - tileMode.arrayMode() == kArrayModeLinearAligned; - + Cache::ImageBuffer imageBuffer) { auto subresource = getSubresource(range); std::vector regions; @@ -730,75 +920,28 @@ struct CachedImage : Cache::Entry { auto &sched = tag->getScheduler(); - VkBuffer sourceBuffer; - if (isLinear) { - sourceBuffer = tiledBuffer.handle; - for (unsigned mipLevel = subresource.baseMipLevel; - mipLevel < subresource.baseMipLevel + subresource.levelCount; - ++mipLevel) { - auto &mipInfo = info.getSubresourceInfo(mipLevel); - regions.push_back({ - .bufferOffset = mipInfo.tiledOffset + tiledBuffer.offset, - .bufferRowLength = - mipLevel > 0 ? 0 : std::max(pitch >> mipLevel, 1u), - .imageSubresource = - { - .aspectMask = toAspect(kind), - .mipLevel = mipLevel, - .baseArrayLayer = subresource.baseArrayLayer, - .layerCount = subresource.layerCount, - }, - .imageExtent = - { - .width = std::max(image.getWidth() >> mipLevel, 1u), - .height = std::max(image.getHeight() >> mipLevel, 1u), - .depth = std::max(image.getDepth() >> mipLevel, 1u), - }, - }); - } - } else { - auto &tiler = tag->getDevice()->tiler; - - for (unsigned mipLevel = subresource.baseMipLevel; - mipLevel < subresource.baseMipLevel + subresource.levelCount; - ++mipLevel) { - auto &mipInfo = info.getSubresourceInfo(mipLevel); - - regions.push_back({ - .bufferOffset = mipInfo.linearOffset, - .imageSubresource = - { - .aspectMask = toAspect(kind), - .mipLevel = mipLevel, - .baseArrayLayer = subresource.baseArrayLayer, - .layerCount = subresource.layerCount, - }, - .imageExtent = - { - .width = std::max(image.getWidth() >> mipLevel, 1u), - .height = std::max(image.getHeight() >> mipLevel, 1u), - .depth = std::max(image.getDepth() >> mipLevel, 1u), - }, - }); - } - - auto detiledBuffer = - vk::Buffer::Allocate(vk::getDeviceLocalMemory(), info.totalLinearSize, - VK_BUFFER_USAGE_2_TRANSFER_DST_BIT_KHR | - VK_BUFFER_USAGE_2_TRANSFER_SRC_BIT_KHR); - - sourceBuffer = detiledBuffer.getHandle(); - auto linearAddress = detiledBuffer.getAddress(); - - sched.afterSubmit([detiledBuffer = std::move(detiledBuffer)] {}); - - for (unsigned mipLevel = subresource.baseMipLevel; - mipLevel < subresource.baseMipLevel + subresource.levelCount; - ++mipLevel) { - tiler.detile(sched, info, tileMode, tiledBuffer.deviceAddress, - info.totalTiledSize, linearAddress, info.totalLinearSize, - mipLevel, 0, info.arrayLayerCount); - } + for (unsigned mipLevel = subresource.baseMipLevel; + mipLevel < subresource.baseMipLevel + subresource.levelCount; + ++mipLevel) { + auto &mipInfo = info.getSubresourceInfo(mipLevel); + regions.push_back({ + .bufferOffset = mipInfo.tiledOffset + imageBuffer.offset, + .bufferRowLength = + mipLevel > 0 ? 0 : std::max(imageBufferKey.pitch >> mipLevel, 1u), + .imageSubresource = + { + .aspectMask = toAspect(kind), + .mipLevel = mipLevel, + .baseArrayLayer = subresource.baseArrayLayer, + .layerCount = subresource.layerCount, + }, + .imageExtent = + { + .width = std::max(image.getWidth() >> mipLevel, 1u), + .height = std::max(image.getHeight() >> mipLevel, 1u), + .depth = std::max(image.getDepth() >> mipLevel, 1u), + }, + }); } transitionImageLayout(sched.getCommandBuffer(), image, @@ -806,7 +949,7 @@ struct CachedImage : Cache::Entry { VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource); vkCmdCopyBufferToImage( - sched.getCommandBuffer(), sourceBuffer, image.getHandle(), + sched.getCommandBuffer(), imageBuffer.handle, image.getHandle(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, regions.size(), regions.data()); transitionImageLayout(sched.getCommandBuffer(), image, @@ -814,129 +957,72 @@ struct CachedImage : Cache::Entry { VK_IMAGE_LAYOUT_GENERAL, subresource); } - void write(Scheduler &scheduler, Cache::Buffer tiledBuffer, - const VkImageSubresourceRange &subresourceRange) { + void write(Scheduler &scheduler, Cache::ImageBuffer imageBuffer, + rx::AddressRange range) { + auto subresourceRange = getSubresource(range); + std::vector regions; + regions.reserve(subresourceRange.levelCount); + + for (unsigned mipLevelOffset = 0; + mipLevelOffset < subresourceRange.levelCount; ++mipLevelOffset) { + auto mipLevel = mipLevelOffset + subresourceRange.baseMipLevel; + auto ®ionInfo = info.getSubresourceInfo(mipLevel); + + regions.push_back({ + .bufferOffset = imageBuffer.offset + regionInfo.linearOffset, + .bufferRowLength = + mipLevel > 0 ? 0 : std::max(info.pitch >> mipLevel, 1u), + .imageSubresource = + { + .aspectMask = toAspect(kind), + .mipLevel = mipLevel, + .baseArrayLayer = 0, + .layerCount = image.getArrayLayers(), + }, + .imageExtent = + { + .width = std::max(image.getWidth() >> mipLevel, 1u), + .height = std::max(image.getHeight() >> mipLevel, 1u), + .depth = std::max(image.getDepth() >> mipLevel, 1u), + }, + }); + } + transitionImageLayout( scheduler.getCommandBuffer(), image, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresourceRange); - bool isLinear = tileMode.arrayMode() == kArrayModeLinearGeneral || - tileMode.arrayMode() == kArrayModeLinearAligned; - - std::vector regions; - regions.reserve(subresourceRange.levelCount); - - if (isLinear) { - for (unsigned mipLevelOffset = 0; - mipLevelOffset < subresourceRange.levelCount; ++mipLevelOffset) { - auto mipLevel = mipLevelOffset + subresourceRange.baseMipLevel; - auto ®ionInfo = info.getSubresourceInfo(mipLevel); - - regions.push_back({ - .bufferOffset = tiledBuffer.offset + regionInfo.tiledOffset, - .bufferRowLength = - mipLevel > 0 ? 0 : std::max(info.pitch >> mipLevel, 1u), - .imageSubresource = - { - .aspectMask = toAspect(kind), - .mipLevel = mipLevel, - .baseArrayLayer = 0, - .layerCount = image.getArrayLayers(), - }, - .imageExtent = - { - .width = std::max(image.getWidth() >> mipLevel, 1u), - .height = std::max(image.getHeight() >> mipLevel, 1u), - .depth = std::max(image.getDepth() >> mipLevel, 1u), - }, - }); - } - - vkCmdCopyImageToBuffer(scheduler.getCommandBuffer(), image.getHandle(), - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - tiledBuffer.handle, regions.size(), - regions.data()); - } else { - for (unsigned mipLevelOffset = 0; - mipLevelOffset < subresourceRange.levelCount; ++mipLevelOffset) { - auto mipLevel = mipLevelOffset + subresourceRange.baseMipLevel; - - auto ®ionInfo = info.getSubresourceInfo(mipLevel); - regions.push_back({ - .bufferOffset = tiledBuffer.offset + regionInfo.linearOffset, - .imageSubresource = - { - .aspectMask = toAspect(kind), - .mipLevel = mipLevel, - .baseArrayLayer = 0, - .layerCount = image.getArrayLayers(), - }, - .imageExtent = - { - .width = std::max(image.getWidth() >> mipLevel, 1u), - .height = std::max(image.getHeight() >> mipLevel, 1u), - .depth = std::max(image.getDepth() >> mipLevel, 1u), - }, - }); - } - - auto transferBuffer = vk::Buffer::Allocate( - vk::getDeviceLocalMemory(), info.totalLinearSize, - VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT); - - vkCmdCopyImageToBuffer(scheduler.getCommandBuffer(), image.getHandle(), - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - transferBuffer.getHandle(), regions.size(), - regions.data()); - - for (unsigned mipLevel = 0; mipLevel < image.getMipLevels(); ++mipLevel) { - tiler->tile(scheduler, info, tileMode, transferBuffer.getAddress(), - info.totalLinearSize, tiledBuffer.deviceAddress, - info.totalTiledSize, mipLevel, 0, image.getArrayLayers()); - } - - scheduler.afterSubmit([transferBuffer = std::move(transferBuffer)] {}); - } + vkCmdCopyImageToBuffer(scheduler.getCommandBuffer(), image.getHandle(), + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + imageBuffer.handle, regions.size(), regions.data()); transitionImageLayout(scheduler.getCommandBuffer(), image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL, subresourceRange); - - scheduler.submit(); } - void flush(Cache::Tag &tag, Scheduler &scheduler, rx::AddressRange range) { + bool flush(Cache::Tag &tag, Scheduler &scheduler, rx::AddressRange range) { if (!hasDelayedFlush) { - return; + return false; } hasDelayedFlush = false; - auto subresourceRange = getSubresource(range); - auto beginOffset = - info.getSubresourceInfo(subresourceRange.baseMipLevel).tiledOffset; - auto lastLevelInfo = info.getSubresourceInfo( - subresourceRange.baseMipLevel + subresourceRange.levelCount - 1); - auto totalTiledSubresourceSize = - lastLevelInfo.tiledOffset + - lastLevelInfo.tiledSize * subresourceRange.layerCount; - - auto targetRange = rx::AddressRange::fromBeginSize( - range.beginAddress() + beginOffset, totalTiledSubresourceSize); - - auto tiledBuffer = tag.getBuffer(targetRange, Access::Write); - - write(scheduler, tiledBuffer, subresourceRange); + auto imageBuffer = tag.getImageBuffer(imageBufferKey, Access::Write); + write(scheduler, imageBuffer, range); + return true; } - void release(Cache::Tag *tag, Access) override { + bool release(Cache::Tag *tag, Access) override { hasDelayedFlush = true; auto locked = expensive(); tag->getCache()->trackWrite(addressRange, tagId, locked); - if (!locked) { - flush(*tag, tag->getScheduler(), addressRange); + if (locked) { + return true; } + + return flush(*tag, tag->getScheduler(), addressRange); } }; @@ -944,27 +1030,111 @@ struct CachedImageView : Cache::Entry { vk::ImageView view; }; -ImageKey ImageKey::createFrom(const gnm::TBuffer &buffer) { +ImageViewKey ImageViewKey::createFrom(const gnm::TBuffer &tbuffer) { return { - .readAddress = buffer.address(), - .writeAddress = buffer.address(), - .type = buffer.type, - .dfmt = buffer.dfmt, - .nfmt = buffer.nfmt, - .tileMode = getDefaultTileModes()[buffer.tiling_idx], + .readAddress = tbuffer.address(), + .writeAddress = tbuffer.address(), + .type = tbuffer.type, + .dfmt = tbuffer.dfmt, + .nfmt = tbuffer.nfmt, + .tileMode = getDefaultTileModes()[tbuffer.tiling_idx], .extent = { - .width = buffer.width + 1u, - .height = buffer.height + 1u, - .depth = buffer.depth + 1u, + .width = tbuffer.width + 1u, + .height = tbuffer.height + 1u, + .depth = tbuffer.depth + 1u, }, - .pitch = buffer.pitch + 1u, - .baseMipLevel = static_cast(buffer.base_level), - .mipCount = buffer.last_level - buffer.base_level + 1u, - .baseArrayLayer = static_cast(buffer.base_array), - .arrayLayerCount = buffer.last_array - buffer.base_array + 1u, + .pitch = tbuffer.pitch + 1u, + .baseMipLevel = static_cast(tbuffer.base_level), + .mipCount = tbuffer.last_level - tbuffer.base_level + 1u, + .baseArrayLayer = static_cast(tbuffer.base_array), + .arrayLayerCount = tbuffer.last_array - tbuffer.base_array + 1u, .kind = ImageKind::Color, - .pow2pad = buffer.pow2pad != 0, + .pow2pad = tbuffer.pow2pad != 0, + .r = tbuffer.dst_sel_x, + .g = tbuffer.dst_sel_y, + .b = tbuffer.dst_sel_z, + .a = tbuffer.dst_sel_w, + }; +} + +ImageKey ImageKey::createFrom(const gnm::TBuffer &tbuffer) { + return { + .readAddress = tbuffer.address(), + .writeAddress = tbuffer.address(), + .type = tbuffer.type, + .dfmt = tbuffer.dfmt, + .nfmt = tbuffer.nfmt, + .tileMode = getDefaultTileModes()[tbuffer.tiling_idx], + .extent = + { + .width = tbuffer.width + 1u, + .height = tbuffer.height + 1u, + .depth = tbuffer.depth + 1u, + }, + .pitch = tbuffer.pitch + 1u, + .baseMipLevel = static_cast(tbuffer.base_level), + .mipCount = tbuffer.last_level - tbuffer.base_level + 1u, + .baseArrayLayer = static_cast(tbuffer.base_array), + .arrayLayerCount = tbuffer.last_array - tbuffer.base_array + 1u, + .kind = ImageKind::Color, + .pow2pad = tbuffer.pow2pad != 0, + }; +} + +ImageKey ImageKey::createFrom(const ImageViewKey &imageView) { + return { + .readAddress = imageView.readAddress, + .writeAddress = imageView.writeAddress, + .type = imageView.type, + .dfmt = imageView.dfmt, + .nfmt = imageView.nfmt, + .tileMode = imageView.tileMode, + .extent = imageView.extent, + .pitch = imageView.pitch, + .baseMipLevel = imageView.baseMipLevel, + .mipCount = imageView.mipCount, + .baseArrayLayer = imageView.baseArrayLayer, + .arrayLayerCount = imageView.arrayLayerCount, + .kind = imageView.kind, + .pow2pad = imageView.pow2pad, + }; +} + +ImageBufferKey ImageBufferKey::createFrom(const gnm::TBuffer &tbuffer) { + return { + .address = tbuffer.address(), + .type = tbuffer.type, + .dfmt = tbuffer.dfmt, + .tileMode = getDefaultTileModes()[tbuffer.tiling_idx], + .extent = + { + .width = tbuffer.width + 1u, + .height = tbuffer.height + 1u, + .depth = tbuffer.depth + 1u, + }, + .pitch = tbuffer.pitch + 1u, + .baseMipLevel = static_cast(tbuffer.base_level), + .mipCount = tbuffer.last_level - tbuffer.base_level + 1u, + .baseArrayLayer = static_cast(tbuffer.base_array), + .arrayLayerCount = tbuffer.last_array - tbuffer.base_array + 1u, + .pow2pad = tbuffer.pow2pad != 0, + }; +} + +ImageBufferKey ImageBufferKey::createFrom(const ImageKey &imageKey) { + return { + .address = imageKey.readAddress, + .type = imageKey.type, + .dfmt = imageKey.dfmt, + .tileMode = imageKey.tileMode, + .extent = imageKey.extent, + .pitch = imageKey.pitch, + .baseMipLevel = imageKey.baseMipLevel, + .mipCount = imageKey.mipCount, + .baseArrayLayer = imageKey.baseArrayLayer, + .arrayLayerCount = imageKey.arrayLayerCount, + .pow2pad = imageKey.pow2pad, }; } @@ -1164,9 +1334,16 @@ Cache::Buffer Cache::Tag::getBuffer(rx::AddressRange range, Access access) { auto it = table.queryArea(range.beginAddress()); if (it == table.end() || !it.range().contains(range)) { - mParent->flushImages(*this, range); - mScheduler->submit(); - mScheduler->wait(); + if (mParent->flushImages(*this, range)) { + mScheduler->submit(); + mScheduler->wait(); + } + + if (mParent->flushImageBuffers(*this, range)) { + mScheduler->submit(); + mScheduler->wait(); + } + mParent->flushBuffers(range); it = table.map(range.beginAddress(), range.endAddress(), nullptr, false, @@ -1186,9 +1363,9 @@ Cache::Buffer Cache::Tag::getBuffer(rx::AddressRange range, Access access) { it.get() = std::move(cached); } - auto cached = static_cast(it->get()); mStorage->mAcquiredMemoryResources.push_back(it.get()); + auto cached = static_cast(it->get()); cached->acquire(this, access); auto addressRange = it.get()->addressRange; @@ -1198,8 +1375,16 @@ Cache::Buffer Cache::Tag::getBuffer(rx::AddressRange range, Access access) { addressRange.beginAddress(), addressRange.size()) || !mParent->isInSync(addressRange, cached->tagId)) { - mParent->flushImages(*this, range); - getScheduler().wait(); + if (mParent->flushImages(*this, range)) { + getScheduler().submit(); + getScheduler().wait(); + } + + if (mParent->flushImageBuffers(*this, range)) { + getScheduler().submit(); + getScheduler().wait(); + } + mParent->trackUpdate(EntryType::HostVisibleBuffer, addressRange, it.get(), getReadId(), cached->expensive()); amdgpu::RemoteMemory memory{mParent->mVmId}; @@ -1445,8 +1630,120 @@ static bool isImageCompatible(CachedImage *cached, const ImageKey &key) { cached->image.getWidth() == key.extent.width && cached->image.getHeight() == key.extent.height && cached->image.getDepth() == key.extent.depth && - cached->pitch == key.pitch && - cached->tileMode.raw == key.tileMode.raw && cached->kind == key.kind; + cached->imageBufferKey.pitch == key.pitch && + cached->imageBufferKey.tileMode.raw == key.tileMode.raw && + cached->kind == key.kind; +} + +static bool isImageBufferCompatible(CachedImageBuffer *cached, + const ImageBufferKey &key) { + // FIXME: relax it + return cached->dfmt == key.dfmt && cached->width == key.extent.width && + cached->height == key.extent.height && + cached->depth == key.extent.depth && cached->pitch == key.pitch && + cached->tileMode.raw == key.tileMode.raw; +} + +Cache::ImageBuffer Cache::Tag::getImageBuffer(const ImageBufferKey &key, + Access access) { + auto surfaceInfo = computeSurfaceInfo( + key.tileMode, key.type, key.dfmt, key.extent.width, key.extent.height, + key.extent.depth, key.pitch, key.baseArrayLayer, key.arrayLayerCount, + key.baseMipLevel, key.mipCount, key.pow2pad); + + auto range = + rx::AddressRange::fromBeginSize(key.address, surfaceInfo.totalTiledSize); + + auto &table = mParent->getTable(EntryType::ImageBuffer); + + std::vector> flushed; + for (auto it = table.lowerBound(range.beginAddress()); it != table.end(); + ++it) { + if (!range.intersects(it.range())) { + break; + } + + auto imgBuffer = std::static_pointer_cast(it.get()); + + if (range == it.range()) { + if (isImageBufferCompatible(imgBuffer.get(), key)) { + break; + } + + if (imgBuffer->flush(*this, getScheduler(), imgBuffer->addressRange)) { + flushed.push_back(std::move(imgBuffer)); + } + + it.get() = nullptr; + break; + } + + if (imgBuffer->flush(*this, getScheduler(), imgBuffer->addressRange)) { + flushed.push_back(std::move(imgBuffer)); + } + } + + if (!flushed.empty()) { + getScheduler().submit(); + getScheduler().wait(); + flushed.clear(); + } + + auto it = + table.map(range.beginAddress(), range.endAddress(), nullptr, false, true); + + if (it.get() == nullptr) { + auto cached = std::make_shared(); + cached->buffer = vk::Buffer::Allocate( + vk::getDeviceLocalMemory(), surfaceInfo.totalLinearSize, + VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT); + cached->tiler = &getDevice()->tiler; + cached->info = surfaceInfo; + cached->addressRange = range; + cached->tileMode = key.tileMode; + cached->dfmt = key.dfmt; + cached->pitch = key.pitch; + cached->arrayLayers = key.arrayLayerCount; + cached->mipLevels = key.mipCount; + cached->width = key.extent.width; + cached->height = key.extent.height; + cached->depth = key.extent.depth; + + it.get() = std::move(cached); + } + + mStorage->mAcquiredImageBufferResources.push_back(it.get()); + + auto cached = std::static_pointer_cast(it.get()); + cached->acquire(this, access); + + if ((access & Access::Read) != Access::None) { + if (!cached->expensive() || + testHostInvalidations(getDevice(), mParent->mVmId, range.beginAddress(), + range.size()) || + !mParent->isInSync(cached->addressRange, cached->tagId)) { + + auto tiledBuffer = getBuffer(range, Access::Read); + if (tiledBuffer.tagId != cached->tagId) { + mParent->trackUpdate(EntryType::ImageBuffer, range, it.get(), + tiledBuffer.tagId, cached->expensive()); + + cached->update(this, cached->addressRange, tiledBuffer); + } + } + } + + std::uint64_t offset = + cached->addressRange.beginAddress() - range.beginAddress(); + + Cache::ImageBuffer result{ + .handle = cached->buffer.getHandle(), + .offset = offset, + .deviceAddress = cached->buffer.getAddress() + offset, + .tagId = cached->tagId, + }; + + return result; } Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) { @@ -1465,30 +1762,39 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) { auto &table = mParent->getTable(EntryType::Image); + std::vector> flushed; + for (auto it = table.lowerBound(storeRange.beginAddress()); it != table.end(); ++it) { if (!storeRange.intersects(it.range())) { break; } - auto img = static_cast(it->get()); + auto img = std::static_pointer_cast(it.get()); if (storeRange == it.range()) { - if (isImageCompatible(img, key)) { + if (isImageCompatible(img.get(), key)) { break; } - img->flush(*this, getScheduler(), img->addressRange); - getScheduler().wait(); + if (img->flush(*this, getScheduler(), img->addressRange)) { + flushed.push_back(std::move(img)); + } + it.get() = nullptr; break; } - img->flush(*this, getScheduler(), img->addressRange); + if (img->flush(*this, getScheduler(), img->addressRange)) { + flushed.push_back(std::move(img)); + } } - getScheduler().submit(); - getScheduler().wait(); + if (!flushed.empty()) { + getScheduler().submit(); + getScheduler().wait(); + flushed.clear(); + } auto it = table.map(storeRange.beginAddress(), storeRange.endAddress(), nullptr, false, true); @@ -1506,7 +1812,8 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) { key.dfmt == gnm::kDataFormatBc5 || key.dfmt == gnm::kDataFormatBc6 || key.dfmt == gnm::kDataFormatBc7 || key.dfmt == gnm::kDataFormatGB_GR || - key.dfmt == gnm::kDataFormatBG_RG; + key.dfmt == gnm::kDataFormatBG_RG || + key.dfmt == gnm::kDataFormat5_6_5; if (!isCompressed) { usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; @@ -1548,13 +1855,10 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) { auto cached = std::make_shared(); cached->image = std::move(image); - cached->tiler = &getDevice()->tiler; cached->info = surfaceInfo; cached->addressRange = storeRange; cached->kind = key.kind; - cached->tileMode = key.tileMode; - cached->dfmt = key.dfmt; - cached->pitch = key.pitch; + cached->imageBufferKey = ImageBufferKey::createFrom(key); transitionImageLayout(mScheduler->getCommandBuffer(), cached->image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, @@ -1562,6 +1866,8 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) { it.get() = std::move(cached); } + mStorage->mAcquiredImageResources.push_back(it.get()); + auto cached = std::static_pointer_cast(it.get()); cached->acquire(this, access); @@ -1571,19 +1877,20 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) { updateRange.beginAddress(), updateRange.size()) || !mParent->isInSync(cached->addressRange, cached->tagId)) { - auto tiledBuffer = getBuffer(updateRange, Access::Read); - if (tiledBuffer.tagId != cached->tagId) { + auto imageBufferKey = cached->imageBufferKey; + imageBufferKey.address = key.readAddress; + auto imageBuffer = getImageBuffer(imageBufferKey, Access::Read); + if (imageBuffer.tagId != cached->tagId) { mParent->trackUpdate(EntryType::Image, storeRange, it.get(), - tiledBuffer.tagId, cached->expensive()); + imageBuffer.tagId, cached->expensive()); - cached->update(this, cached->addressRange, tiledBuffer); + cached->update(this, cached->addressRange, imageBuffer); } } } auto entry = cached.get(); auto handle = cached->image.getHandle(); - mStorage->mAcquiredImageResources.push_back(std::move(cached)); return { .handle = handle, @@ -1593,7 +1900,8 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) { }; } -Cache::ImageView Cache::Tag::getImageView(const ImageKey &key, Access access) { +Cache::ImageView Cache::Tag::getImageView(const ImageViewKey &key, + Access access) { auto surfaceInfo = computeSurfaceInfo( key.tileMode, key.type, key.dfmt, key.extent.width, key.extent.height, key.extent.depth, key.pitch, key.baseArrayLayer, key.arrayLayerCount, @@ -1601,16 +1909,15 @@ Cache::ImageView Cache::Tag::getImageView(const ImageKey &key, Access access) { auto storeRange = rx::AddressRange::fromBeginSize(key.writeAddress, surfaceInfo.totalTiledSize); - auto updateRange = rx::AddressRange::fromBeginSize( - key.readAddress, surfaceInfo.totalTiledSize); - - if ((access & Access::Write) != Access::Write) { - storeRange = updateRange; - } - - auto image = getImage(key, access); + auto image = getImage(ImageKey::createFrom(key), access); auto result = vk::ImageView(gnm::toVkImageViewType(key.type), image.handle, - image.format, {}, + image.format, + { + .r = gnm::toVkComponentSwizzle(key.r), + .g = gnm::toVkComponentSwizzle(key.g), + .b = gnm::toVkComponentSwizzle(key.b), + .a = gnm::toVkComponentSwizzle(key.a), + }, { .aspectMask = toAspect(key.kind), .baseMipLevel = key.baseMipLevel, @@ -1686,30 +1993,47 @@ void Cache::Tag::release() { } std::vector> tmpResources; + bool hasSubmits = false; while (!mStorage->mAcquiredImageResources.empty()) { auto resource = std::move(mStorage->mAcquiredImageResources.back()); mStorage->mAcquiredImageResources.pop_back(); - resource->release(this); + if (resource->release(this)) { + hasSubmits = true; + } + + tmpResources.push_back(std::move(resource)); + } + + if (hasSubmits) { + hasSubmits = false; mScheduler->submit(); mScheduler->wait(); + } + + while (!mStorage->mAcquiredImageBufferResources.empty()) { + auto resource = std::move(mStorage->mAcquiredImageBufferResources.back()); + mStorage->mAcquiredImageBufferResources.pop_back(); + if (resource->release(this)) { + hasSubmits = true; + } + tmpResources.push_back(std::move(resource)); } + if (hasSubmits) { + hasSubmits = false; + mScheduler->submit(); + mScheduler->wait(); + } + while (!mStorage->mAcquiredMemoryResources.empty()) { auto resource = std::move(mStorage->mAcquiredMemoryResources.back()); mStorage->mAcquiredMemoryResources.pop_back(); resource->release(this); - mScheduler->submit(); - mScheduler->wait(); tmpResources.push_back(std::move(resource)); } - if (!tmpResources.empty()) { - mScheduler->submit(); - mScheduler->wait(); - } - mStorage->clear(); auto storageIndex = mStorage - mParent->mTagStorages; mStorage = nullptr; @@ -2186,7 +2510,8 @@ Cache::Cache(Device *device, int vmId) : mDevice(device), mVmId(vmId) { .maxSets = static_cast(std::size(mGraphicsDescriptorSets) * mGraphicsDescriptorSetLayouts.size() + - std::size(mComputeDescriptorSets)) * 2, + std::size(mComputeDescriptorSets)) * + 2, .poolSizeCount = static_cast(std::size(descriptorPoolSizes)), .pPoolSizes = descriptorPoolSizes, }; @@ -2260,9 +2585,16 @@ void Cache::invalidate(Tag &tag, rx::AddressRange range) { markHostInvalidated(mDevice, mVmId, range.beginAddress(), range.size()); } void Cache::flush(Tag &tag, rx::AddressRange range) { - flushImages(tag, range); - tag.getScheduler().submit(); - tag.getScheduler().wait(); + if (flushImages(tag, range)) { + tag.getScheduler().submit(); + tag.getScheduler().wait(); + } + + if (flushImageBuffers(tag, range)) { + tag.getScheduler().submit(); + tag.getScheduler().wait(); + } + flushBuffers(range); } @@ -2309,8 +2641,32 @@ rx::AddressRange Cache::flushImages(Tag &tag, rx::AddressRange range) { break; } - static_cast(cached)->flush(tag, tag.getScheduler(), range); - result = result.merge(cached->addressRange); + if (static_cast(cached)->flush(tag, tag.getScheduler(), + range)) { + result = result.merge(cached->addressRange); + } + ++beginIt; + } + + return result; +} + +rx::AddressRange Cache::flushImageBuffers(Tag &tag, rx::AddressRange range) { + auto &table = getTable(EntryType::ImageBuffer); + rx::AddressRange result; + auto beginIt = table.lowerBound(range.beginAddress()); + + while (beginIt != table.end()) { + auto cached = beginIt->get(); + if (!cached->addressRange.intersects(range)) { + break; + } + + if (static_cast(cached)->flush(tag, tag.getScheduler(), + range)) { + result = result.merge(cached->addressRange); + } + ++beginIt; } @@ -2330,10 +2686,11 @@ rx::AddressRange Cache::flushBuffers(rx::AddressRange range) { auto address = RemoteMemory{mVmId}.getPointer(cached->addressRange.beginAddress()); - static_cast(cached)->flush(address, - cached->addressRange); + if (static_cast(cached)->flush( + address, cached->addressRange)) { + result = result.merge(cached->addressRange); + } - result = result.merge(cached->addressRange); ++beginIt; } diff --git a/rpcsx/gpu/Cache.hpp b/rpcsx/gpu/Cache.hpp index 24fbb44cb..67257a8d0 100644 --- a/rpcsx/gpu/Cache.hpp +++ b/rpcsx/gpu/Cache.hpp @@ -28,6 +28,30 @@ struct ShaderKey { enum class ImageKind : std::uint8_t { Color, Depth, Stencil }; +struct ImageViewKey { + std::uint64_t readAddress; + std::uint64_t writeAddress; + gnm::TextureType type; + gnm::DataFormat dfmt; + gnm::NumericFormat nfmt; + TileMode tileMode = {}; + VkOffset3D offset = {0, 0, 0}; + VkExtent3D extent = {1, 1, 1}; + std::uint32_t pitch = 1; + unsigned baseMipLevel = 0; + unsigned mipCount = 1; + unsigned baseArrayLayer = 0; + unsigned arrayLayerCount = 1; + ImageKind kind = ImageKind::Color; + bool pow2pad = false; + gnm::Swizzle r = gnm::Swizzle::R; + gnm::Swizzle g = gnm::Swizzle::G; + gnm::Swizzle b = gnm::Swizzle::B; + gnm::Swizzle a = gnm::Swizzle::A; + + static ImageViewKey createFrom(const gnm::TBuffer &tbuffer); +}; + struct ImageKey { std::uint64_t readAddress; std::uint64_t writeAddress; @@ -45,6 +69,24 @@ struct ImageKey { bool pow2pad = false; static ImageKey createFrom(const gnm::TBuffer &tbuffer); + static ImageKey createFrom(const ImageViewKey &imageView); +}; + +struct ImageBufferKey { + std::uint64_t address; + gnm::TextureType type; + gnm::DataFormat dfmt; + TileMode tileMode = {}; + VkExtent3D extent = {1, 1, 1}; + std::uint32_t pitch = 1; + unsigned baseMipLevel = 0; + unsigned mipCount = 1; + unsigned baseArrayLayer = 0; + unsigned arrayLayerCount = 1; + bool pow2pad = false; + + static ImageBufferKey createFrom(const gnm::TBuffer &tbuffer); + static ImageBufferKey createFrom(const ImageKey &imageKey); }; struct SamplerKey { @@ -74,6 +116,7 @@ struct Cache { HostVisibleBuffer, DeviceLocalBuffer, IndexBuffer, + ImageBuffer, Image, Shader, @@ -144,6 +187,13 @@ struct Cache { std::byte *data; }; + struct ImageBuffer { + VkBuffer handle = VK_NULL_HANDLE; + std::uint64_t offset; + std::uint64_t deviceAddress; + TagId tagId; + }; + struct IndexBuffer { VkBuffer handle = VK_NULL_HANDLE; std::uint64_t offset; @@ -240,6 +290,7 @@ private: }; std::vector> mAcquiredImageResources; + std::vector> mAcquiredImageBufferResources; std::vector> mAcquiredMemoryResources; std::vector> mAcquiredViewResources; std::vector memoryTableConfigSlots; @@ -251,6 +302,7 @@ private: void clear() { mAcquiredImageResources.clear(); + mAcquiredImageBufferResources.clear(); mAcquiredMemoryResources.clear(); memoryTableConfigSlots.clear(); descriptorBuffers.clear(); @@ -306,8 +358,9 @@ public: std::uint32_t indexCount, gnm::PrimitiveType primType, gnm::IndexType indexType); + ImageBuffer getImageBuffer(const ImageBufferKey &key, Access access); Image getImage(const ImageKey &key, Access access); - ImageView getImageView(const ImageKey &key, Access access); + ImageView getImageView(const ImageViewKey &key, Access access); void readMemory(void *target, rx::AddressRange range); void writeMemory(const void *source, rx::AddressRange range); int compareMemory(const void *source, rx::AddressRange range); @@ -503,6 +556,7 @@ public: auto &getTable(EntryType type) { return mTables[static_cast(type)]; } rx::AddressRange flushImages(Tag &tag, rx::AddressRange range); + rx::AddressRange flushImageBuffers(Tag &tag, rx::AddressRange range); rx::AddressRange flushBuffers(rx::AddressRange range); private: diff --git a/rpcsx/gpu/Device.cpp b/rpcsx/gpu/Device.cpp index f2b84b644..3b629042d 100644 --- a/rpcsx/gpu/Device.cpp +++ b/rpcsx/gpu/Device.cpp @@ -224,9 +224,15 @@ Device::Device() : vkContext(createVkContext(this)) { rx::AddressRange::fromBeginSize(address, rx::mem::pageSize); auto tag = getCacheTag(vmId, sched); - tag.getCache()->flushImages(tag, range); - sched.submit(); - sched.wait(); + if (tag.getCache()->flushImages(tag, range)) { + sched.submit(); + sched.wait(); + } + + if (tag.getCache()->flushImageBuffers(tag, range)) { + sched.submit(); + sched.wait(); + } auto flushedRange = tag.getCache()->flushBuffers(range); diff --git a/rpcsx/gpu/FlipPipeline.cpp b/rpcsx/gpu/FlipPipeline.cpp index 917c95580..8b1ea229c 100644 --- a/rpcsx/gpu/FlipPipeline.cpp +++ b/rpcsx/gpu/FlipPipeline.cpp @@ -179,16 +179,22 @@ FlipPipeline::FlipPipeline() { .pColorAttachmentFormats = &colorFormat, }; + VkPipelineViewportStateCreateInfo viewportState{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }; + VkGraphicsPipelineCreateInfo pipelineCreateInfos[]{ { .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .pNext = &info, - .flags = VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT, .stageCount = std::size(stagesStd), .pStages = stagesStd, .pVertexInputState = &vertexInputState, .pInputAssemblyState = &inputAssemblyState, .pTessellationState = &tessellationState, + .pViewportState = &viewportState, .pRasterizationState = &rasterizationState, .pMultisampleState = &multisampleState, .pDepthStencilState = &depthStencilState, @@ -199,12 +205,12 @@ FlipPipeline::FlipPipeline() { { .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, .pNext = &info, - .flags = VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT, .stageCount = std::size(stagesAlt), .pStages = stagesAlt, .pVertexInputState = &vertexInputState, .pInputAssemblyState = &inputAssemblyState, .pTessellationState = &tessellationState, + .pViewportState = &viewportState, .pRasterizationState = &rasterizationState, .pMultisampleState = &multisampleState, .pDepthStencilState = &depthStencilState, diff --git a/rpcsx/gpu/Renderer.cpp b/rpcsx/gpu/Renderer.cpp index 77e77481b..9d80fcee9 100644 --- a/rpcsx/gpu/Renderer.cpp +++ b/rpcsx/gpu/Renderer.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -158,9 +159,10 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex, return; } - if (pipe.context.cbColorControl.mode != gnm::CbMode::Normal) { + if (pipe.context.cbColorControl.mode != gnm::CbMode::Normal && + pipe.context.cbColorControl.mode != gnm::CbMode::EliminateFastClear) { std::println("unimplemented context.cbColorControl.mode = {}", - static_cast(pipe.context.cbColorControl.mode)); + static_cast(pipe.context.cbColorControl.mode)); return; } @@ -242,14 +244,16 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex, auto vkViewPortScissor = gnm::toVkRect2D(viewPortScissor); viewPortScissors[renderTargets] = vkViewPortScissor; - ImageKey renderTargetInfo{}; + ImageViewKey renderTargetInfo{}; renderTargetInfo.type = gnm::TextureType::Dim2D; renderTargetInfo.pitch = vkViewPortScissor.extent.width; renderTargetInfo.readAddress = static_cast(cbColor.base) << 8; renderTargetInfo.writeAddress = renderTargetInfo.readAddress; - renderTargetInfo.extent.width = vkViewPortScissor.extent.width; - renderTargetInfo.extent.height = vkViewPortScissor.extent.height; + renderTargetInfo.extent.width = + vkViewPortScissor.offset.x + vkViewPortScissor.extent.width; + renderTargetInfo.extent.height = + vkViewPortScissor.offset.y + vkViewPortScissor.extent.height; renderTargetInfo.extent.depth = 1; renderTargetInfo.dfmt = cbColor.info.dfmt; renderTargetInfo.nfmt = @@ -273,7 +277,8 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex, if (pipe.uConfig.vgtPrimitiveType == gnm::PrimitiveType::None) { if (cbColor.info.fastClear) { - auto image = cacheTag.getImage(renderTargetInfo, access); + auto image = + cacheTag.getImage(ImageKey::createFrom(renderTargetInfo), access); VkClearColorValue clearValue = { .uint32 = { @@ -618,7 +623,7 @@ void amdgpu::flip(Cache::Tag &cacheTag, VkExtent2D targetExtent, std::uint64_t address, VkImageView target, VkExtent2D imageExtent, FlipType type, TileMode tileMode, gnm::DataFormat dfmt, gnm::NumericFormat nfmt) { - ImageKey framebuffer{}; + ImageViewKey framebuffer{}; framebuffer.readAddress = address; framebuffer.type = gnm::TextureType::Dim2D; framebuffer.dfmt = dfmt; @@ -687,8 +692,8 @@ void amdgpu::flip(Cache::Tag &cacheTag, VkExtent2D targetExtent, cacheTag.getDevice()->flipPipeline.bind(cacheTag.getScheduler(), type, imageView.handle, sampler.handle); - vkCmdSetViewportWithCount(commandBuffer, 1, &viewPort); - vkCmdSetScissorWithCount(commandBuffer, 1, viewPortScissors); + vkCmdSetViewport(commandBuffer, 0, 1, &viewPort); + vkCmdSetScissor(commandBuffer, 0, 1, viewPortScissors); vkCmdDraw(commandBuffer, 6, 1, 0, 0); vkCmdEndRendering(commandBuffer); diff --git a/rpcsx/gpu/lib/gcn-shader/shaders/rdna.glsl b/rpcsx/gpu/lib/gcn-shader/shaders/rdna.glsl index 0357208d4..d30f69108 100644 --- a/rpcsx/gpu/lib/gcn-shader/shaders/rdna.glsl +++ b/rpcsx/gpu/lib/gcn-shader/shaders/rdna.glsl @@ -63,6 +63,23 @@ DEFINE_SIZEOF(float64_t, 8); uint thread_id; uint64_t exec; +float32_t swizzle(f32vec4 comp, int sel) { + switch (sel) { + case 0: return 0; + case 1: return 1; + case 4: return comp.x; + case 5: return comp.y; + case 6: return comp.z; + case 7: return comp.w; + } + + return 1; +} + +f32vec4 swizzle(f32vec4 comp, int selX, int selY, int selZ, int selW) { + return f32vec4(swizzle(comp, selX), swizzle(comp, selY), swizzle(comp, selZ), swizzle(comp, selW)); +} + int32_t sext(int32_t x, uint bits) { return bits == 32 ? x : (x << (32 - bits)) >> (32 - bits); } @@ -2164,12 +2181,12 @@ uint32_t[16] s_load_dwordx16(int32_t memoryLocationHint, uint64_t sbase, int32_t } #define S_BUFFER_LOAD_DWORD(dest, memoryLocationHint, vbuffer, offset, N) \ - uint64_t base_address = vbuffer_base(vbuffer) & ~0x3; \ + uint64_t base_address = vbuffer_base(vbuffer) & ~0x3ul; \ uint64_t stride = vbuffer_stride(vbuffer); \ uint64_t num_records = vbuffer_num_records(vbuffer); \ uint64_t size = (stride == 0 ? 1 : stride) * num_records; \ uint64_t deviceAreaSize = 0; \ - uint64_t deviceAddress = findMemoryAddress(base_address + offset, size, memoryLocationHint, deviceAreaSize); \ + uint64_t deviceAddress = findMemoryAddress(base_address + (offset & ~0x3ul), size, memoryLocationHint, deviceAreaSize); \ int32_t _offset = 0; \ for (int i = 0; i < N; i++) { \ if (deviceAddress == kInvalidAddress || _offset + SIZEOF(uint32_t) > deviceAreaSize) { \ @@ -2563,6 +2580,55 @@ uint8_t ssampler_border_color_type(u32vec4 ssampler) { return uint8_t(U32ARRAY_FETCH_BITS(ssampler, 126, 2)); } +uint64_t image_memory_table; + +uint64_t findImageMemoryAddress(uint64_t address, uint64_t size, int32_t hint, out uint64_t areaSize) { + MemoryTable mt = MemoryTable(image_memory_table); + + uint32_t pivot; + uint32_t slotCount = mt.count; + if (hint < 0 || hint >= slotCount) { + pivot = slotCount / 2; + } else { + pivot = uint32_t(hint); + } + + uint32_t begin = 0; + uint32_t end = slotCount; + + while (begin < end) { + MemoryTableSlot slot = mt.slots[pivot]; + uint64_t slotSize = getSlotSize(slot); + if (slot.address >= address + size) { + end = pivot; + } else if (address >= slot.address + slotSize) { + begin = pivot + 1; + } else { + uint64_t offset = address - slot.address; + areaSize = slotSize - offset; + return slot.deviceAddress + offset; + } + + pivot = begin + ((end - begin) / 2); + } + + return kInvalidAddress; +} + + +int findSamplerIndex(int32_t samplerIndexHint, u32vec4 ssampler) { + return samplerIndexHint; +} +int findTexture1DIndex(int32_t textureIndexHint, uint32_t tbuffer[8]) { + return textureIndexHint; +} +int findTexture2DIndex(int32_t textureIndexHint, uint32_t tbuffer[8]) { + return textureIndexHint; +} +int findTexture3DIndex(int32_t textureIndexHint, uint32_t tbuffer[8]) { + return textureIndexHint; +} + // void image_gather4(inout u32vec4 vdata, u32vec4 vaddr, int32_t textureIndexHint, uint32_t tbuffer[8], int32_t samplerIndexHint, u32vec4 samplerDescriptor) {} // image_gather4_cl // image_gather4_l @@ -2588,37 +2654,6 @@ uint8_t ssampler_border_color_type(u32vec4 ssampler) { // image_gather4_c_b_cl_o // image_gather4_c_lz_o -int findSamplerIndex(int32_t samplerIndexHint, u32vec4 ssampler) { - return samplerIndexHint; -} -int findTexture1DIndex(int32_t textureIndexHint, uint32_t tbuffer[8]) { - return textureIndexHint; -} -int findTexture2DIndex(int32_t textureIndexHint, uint32_t tbuffer[8]) { - return textureIndexHint; -} -int findTexture3DIndex(int32_t textureIndexHint, uint32_t tbuffer[8]) { - return textureIndexHint; -} - -float32_t swizzle(f32vec4 comp, int sel) { - switch (sel) { - case 0: return 0; - case 1: return 1; - case 4: return comp.x; - case 5: return comp.y; - case 6: return comp.z; - case 7: return comp.w; - } - - return 1; -} - -f32vec4 swizzle(f32vec4 comp, int selX, int selY, int selZ, int selW) { - return f32vec4(swizzle(comp, selX), swizzle(comp, selY), swizzle(comp, selZ), swizzle(comp, selW)); -} - - // void image_atomic_add() { // // imageAtomicAdd // } @@ -2676,13 +2711,6 @@ void image_load(inout f32vec4 vdata, i32vec3 vaddr, int32_t textureIndexHint, ui return; } - result = swizzle(result, - tbuffer_dst_sel_x(tbuffer), - tbuffer_dst_sel_y(tbuffer), - tbuffer_dst_sel_z(tbuffer), - tbuffer_dst_sel_w(tbuffer)); - - int vdataIndex = 0; for (int i = 0; i < 4; ++i) { if ((dmask & (1 << i)) != 0) { @@ -2721,13 +2749,6 @@ void image_load_mip(inout f32vec4 vdata, u32vec4 vaddr_u, int32_t textureIndexHi return; } - result = swizzle(result, - tbuffer_dst_sel_x(tbuffer), - tbuffer_dst_sel_y(tbuffer), - tbuffer_dst_sel_z(tbuffer), - tbuffer_dst_sel_w(tbuffer)); - - int vdataIndex = 0; for (int i = 0; i < 4; ++i) { if ((dmask & (1 << i)) != 0) { @@ -2782,13 +2803,6 @@ void image_sample(inout f32vec4 vdata, f32vec3 vaddr, int32_t textureIndexHint, // debugPrintfEXT("image_sample: textureType: %u, coord: %v3f, result: %v4f, dmask: %u", textureType, vaddr, result, dmask); - result = swizzle(result, - tbuffer_dst_sel_x(tbuffer), - tbuffer_dst_sel_y(tbuffer), - tbuffer_dst_sel_z(tbuffer), - tbuffer_dst_sel_w(tbuffer)); - - int vdataIndex = 0; for (int i = 0; i < 4; ++i) { if ((dmask & (1 << i)) != 0) { @@ -2837,13 +2851,6 @@ void image_sample_l(inout f32vec4 vdata, f32vec4 vaddr, int32_t textureIndexHint return; } - result = swizzle(result, - tbuffer_dst_sel_x(tbuffer), - tbuffer_dst_sel_y(tbuffer), - tbuffer_dst_sel_z(tbuffer), - tbuffer_dst_sel_w(tbuffer)); - - int vdataIndex = 0; for (int i = 0; i < 4; ++i) { if ((dmask & (1 << i)) != 0) { @@ -2854,7 +2861,50 @@ void image_sample_l(inout f32vec4 vdata, f32vec4 vaddr, int32_t textureIndexHint // image_sample_b // image_sample_b_cl -// image_sample_lz +void image_sample_lz(inout f32vec4 vdata, f32vec3 vaddr, int32_t textureIndexHint, uint32_t tbuffer[8], int32_t samplerIndexHint, u32vec4 ssampler, uint32_t dmask) { + uint8_t textureType = tbuffer_type(tbuffer); + f32vec4 result; + switch (uint(textureType)) { + case kTextureType1D: + case kTextureTypeArray1D: + result = textureLod( + sampler1D( + textures1D[findTexture1DIndex(textureIndexHint, tbuffer)], + samplers[findSamplerIndex(samplerIndexHint, ssampler)] + ), vaddr.x, 0); + break; + + case kTextureType2D: + case kTextureTypeCube: + case kTextureTypeArray2D: + case kTextureTypeMsaa2D: + case kTextureTypeMsaaArray2D: + result = textureLod( + sampler2D( + textures2D[findTexture2DIndex(textureIndexHint, tbuffer)], + samplers[findSamplerIndex(samplerIndexHint, ssampler)] + ), vaddr.xy, 0); + break; + + case kTextureType3D: + result = textureLod( + sampler3D( + textures3D[findTexture3DIndex(textureIndexHint, tbuffer)], + samplers[findSamplerIndex(samplerIndexHint, ssampler)] + ), vaddr.xyz, 0); + break; + + default: + return; + } + + int vdataIndex = 0; + for (int i = 0; i < 4; ++i) { + if ((dmask & (1 << i)) != 0) { + vdata[vdataIndex++] = result[i]; + } + } +} // image_sample_c // image_sample_c_cl // image_sample_c_d