gpu2: fix flipper

static pipeline for flip engine
optimize linear tiler
fixed out of bound in the tiler
implement swizzling for sampled images
This commit is contained in:
DH 2024-09-28 18:07:24 +03:00
parent 4185b1aa40
commit 4e83c9e121
16 changed files with 637 additions and 410 deletions

View file

@ -2,7 +2,8 @@ find_package(glfw3 3.3 REQUIRED)
add_precompiled_vulkan_spirv(rpcsx-gpu-shaders
shaders/fill_red.frag.glsl
shaders/flip.frag.glsl
shaders/flip_std.frag.glsl
shaders/flip_alt.frag.glsl
shaders/flip.vert.glsl
shaders/rect_list.geom.glsl
)
@ -11,6 +12,7 @@ add_executable(rpcsx-gpu2
Cache.cpp
main.cpp
Device.cpp
FlipPipeline.cpp
Pipe.cpp
Registers.cpp
Renderer.cpp

View file

@ -103,8 +103,7 @@ static VkShaderStageFlagBits shaderStageToVk(shader::gcn::Stage stage) {
}
static void fillStageBindings(VkDescriptorSetLayoutBinding *bindings,
VkShaderStageFlagBits stage, int setIndex,
std::uint32_t setCount) {
VkShaderStageFlagBits stage, int setIndex) {
auto createDescriptorBinding = [&](VkDescriptorType type, uint32_t count,
int dim = 0) {
@ -113,7 +112,7 @@ static void fillStageBindings(VkDescriptorSetLayoutBinding *bindings,
bindings[binding] = VkDescriptorSetLayoutBinding{
.binding = static_cast<std::uint32_t>(binding),
.descriptorType = type,
.descriptorCount = count * setCount,
.descriptorCount = count,
.stageFlags = VkShaderStageFlags(
stage | (binding > 0 && stage != VK_SHADER_STAGE_COMPUTE_BIT
? VK_SHADER_STAGE_ALL_GRAPHICS
@ -268,51 +267,102 @@ struct CachedImage : Cache::Entry {
.layerCount = image.getArrayLayers(),
};
auto transferBuffer = vk::Buffer::Allocate(
vk::getDeviceLocalMemory(), info.totalSize,
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
auto tiledBuffer =
tag.getBuffer(baseAddress, info.totalSize, Access::Write);
auto &tiler = tag.getDevice()->tiler;
transitionImageLayout(
scheduler.getCommandBuffer(), image, VK_IMAGE_LAYOUT_GENERAL,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresourceRange);
for (unsigned mipLevel = 0; mipLevel < image.getMipLevels(); ++mipLevel) {
VkBufferImageCopy region = {
.bufferRowLength =
mipLevel > 0 ? 0 : std::max(info.pitch >> mipLevel, 1u),
.imageSubresource =
{
.aspectMask = toAspect(kind),
.mipLevel = mipLevel,
.baseArrayLayer = 0,
.layerCount = image.getArrayLayers(),
},
.imageExtent =
{
.width = std::max(image.getWidth() >> mipLevel, 1u),
.height = std::max(image.getHeight() >> mipLevel, 1u),
.depth = std::max(image.getDepth() >> mipLevel, 1u),
},
};
bool isLinear = acquiredTileMode.arrayMode() == kArrayModeLinearGeneral ||
acquiredTileMode.arrayMode() == kArrayModeLinearAligned;
std::vector<VkBufferImageCopy> regions;
regions.reserve(image.getMipLevels());
auto tiledBuffer =
tag.getBuffer(baseAddress, info.totalSize, Access::Write);
if (isLinear) {
for (unsigned mipLevel = 0; mipLevel < image.getMipLevels(); ++mipLevel) {
auto &regionInfo = info.getSubresourceInfo(mipLevel);
regions.push_back({
.bufferOffset = regionInfo.offset,
.bufferRowLength =
mipLevel > 0 ? 0 : std::max(info.pitch >> mipLevel, 1u),
.imageSubresource =
{
.aspectMask = toAspect(kind),
.mipLevel = mipLevel,
.baseArrayLayer = 0,
.layerCount = image.getArrayLayers(),
},
.imageExtent =
{
.width = std::max(image.getWidth() >> mipLevel, 1u),
.height = std::max(image.getHeight() >> mipLevel, 1u),
.depth = std::max(image.getDepth() >> mipLevel, 1u),
},
});
}
vkCmdCopyImageToBuffer(scheduler.getCommandBuffer(), image.getHandle(),
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
transferBuffer.getHandle(), 1, &region);
tiledBuffer.handle, regions.size(),
regions.data());
} else {
std::uint64_t linearOffset = 0;
for (unsigned mipLevel = 0; mipLevel < image.getMipLevels(); ++mipLevel) {
auto &regionInfo = info.getSubresourceInfo(mipLevel);
regions.push_back({
.bufferOffset = linearOffset,
.bufferRowLength =
mipLevel > 0 ? 0 : std::max(info.pitch >> mipLevel, 1u),
.imageSubresource =
{
.aspectMask = toAspect(kind),
.mipLevel = mipLevel,
.baseArrayLayer = 0,
.layerCount = image.getArrayLayers(),
},
.imageExtent =
{
.width = std::max(image.getWidth() >> mipLevel, 1u),
.height = std::max(image.getHeight() >> mipLevel, 1u),
.depth = std::max(image.getDepth() >> mipLevel, 1u),
},
});
tiler.tile(scheduler, info, acquiredTileMode, acquiredDfmt,
transferBuffer.getAddress(), tiledBuffer.deviceAddress,
mipLevel, 0, image.getArrayLayers());
linearOffset += regionInfo.linearSize * image.getArrayLayers();
}
auto transferBuffer = vk::Buffer::Allocate(
vk::getDeviceLocalMemory(), linearOffset,
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
vkCmdCopyImageToBuffer(scheduler.getCommandBuffer(), image.getHandle(),
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
transferBuffer.getHandle(), regions.size(),
regions.data());
auto &tiler = tag.getDevice()->tiler;
linearOffset = 0;
for (unsigned mipLevel = 0; mipLevel < image.getMipLevels(); ++mipLevel) {
auto &regionInfo = info.getSubresourceInfo(mipLevel);
tiler.tile(scheduler, info, acquiredTileMode, acquiredDfmt,
transferBuffer.getAddress() + linearOffset,
tiledBuffer.deviceAddress, mipLevel, 0,
image.getArrayLayers());
linearOffset += regionInfo.linearSize * image.getArrayLayers();
}
scheduler.afterSubmit([transferBuffer = std::move(transferBuffer)] {});
}
transitionImageLayout(scheduler.getCommandBuffer(), image,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
VK_IMAGE_LAYOUT_GENERAL, subresourceRange);
// scheduler.afterSubmit([transferBuffer = std::move(transferBuffer)] {});
scheduler.submit();
scheduler.wait();
}
};
@ -729,97 +779,133 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) {
key.mipCount, key.arrayLayerCount, gnm::toVkFormat(key.dfmt, key.nfmt),
VK_SAMPLE_COUNT_1_BIT, usage);
VkImageSubresourceRange subresourceRange{
.aspectMask = toAspect(key.kind),
.baseMipLevel = key.baseMipLevel,
.levelCount = key.mipCount,
.baseArrayLayer = key.baseArrayLayer,
.layerCount = key.arrayLayerCount,
};
if ((access & Access::Read) != Access::None) {
auto tiledBuffer =
getBuffer(key.readAddress, surfaceInfo.totalSize, Access::Read);
auto &tiler = mParent->mDevice->tiler;
auto detiledBuffer =
vk::Buffer::Allocate(vk::getDeviceLocalMemory(), surfaceInfo.totalSize,
VK_BUFFER_USAGE_2_TRANSFER_DST_BIT_KHR |
VK_BUFFER_USAGE_2_TRANSFER_SRC_BIT_KHR);
VkImageSubresourceRange subresourceRange{
.aspectMask = toAspect(key.kind),
.baseMipLevel = key.baseMipLevel,
.levelCount = key.mipCount,
.baseArrayLayer = key.baseArrayLayer,
.layerCount = key.arrayLayerCount,
};
bool isLinear = key.tileMode.arrayMode() == kArrayModeLinearGeneral ||
key.tileMode.arrayMode() == kArrayModeLinearAligned;
std::vector<VkBufferImageCopy> regions;
regions.reserve(key.mipCount);
std::vector<VkBufferCopy> bufferRegions;
std::uint64_t dstAddress = 0;
std::uint64_t srcAddress = 0;
VkBuffer sourceBuffer;
auto tiledBuffer =
getBuffer(key.readAddress, surfaceInfo.totalSize, Access::Read);
if (isLinear) {
regions.reserve(key.mipCount);
} else {
dstAddress = detiledBuffer.getAddress();
srcAddress = tiledBuffer.deviceAddress;
}
for (unsigned mipLevel = key.baseMipLevel;
mipLevel < key.baseMipLevel + key.mipCount; ++mipLevel) {
auto &info = surfaceInfo.getSubresourceInfo(mipLevel);
if (isLinear) {
bufferRegions.push_back({
.srcOffset = info.offset,
.dstOffset = dstAddress,
.size = info.linearSize * key.arrayLayerCount,
sourceBuffer = tiledBuffer.handle;
for (unsigned mipLevel = key.baseMipLevel;
mipLevel < key.baseMipLevel + key.mipCount; ++mipLevel) {
auto &info = surfaceInfo.getSubresourceInfo(mipLevel);
regions.push_back({
.bufferOffset = info.offset,
.bufferRowLength =
mipLevel > 0 ? 0 : std::max(key.pitch >> mipLevel, 1u),
.imageSubresource =
{
.aspectMask = toAspect(key.kind),
.mipLevel = mipLevel,
.baseArrayLayer = key.baseArrayLayer,
.layerCount = key.arrayLayerCount,
},
.imageExtent =
{
.width = std::max(key.extent.width >> mipLevel, 1u),
.height = std::max(key.extent.height >> mipLevel, 1u),
.depth = std::max(key.extent.depth >> mipLevel, 1u),
},
});
} else {
tiler.detile(*mScheduler, surfaceInfo, key.tileMode, key.dfmt,
srcAddress, dstAddress, mipLevel, 0, key.arrayLayerCount);
regions.push_back({
.bufferOffset = info.offset,
.bufferRowLength =
mipLevel > 0 ? 0 : std::max(key.pitch >> mipLevel, 1u),
.imageSubresource =
{
.aspectMask = toAspect(key.kind),
.mipLevel = mipLevel,
.baseArrayLayer = key.baseArrayLayer,
.layerCount = key.arrayLayerCount,
},
.imageExtent =
{
.width = std::max(key.extent.width >> mipLevel, 1u),
.height = std::max(key.extent.height >> mipLevel, 1u),
.depth = std::max(key.extent.depth >> mipLevel, 1u),
},
});
}
} else {
auto &tiler = mParent->mDevice->tiler;
std::uint64_t linearOffset = 0;
for (unsigned mipLevel = key.baseMipLevel;
mipLevel < key.baseMipLevel + key.mipCount; ++mipLevel) {
auto &info = surfaceInfo.getSubresourceInfo(mipLevel);
regions.push_back({
.bufferOffset = linearOffset,
.bufferRowLength =
mipLevel > 0 ? 0 : std::max(key.pitch >> mipLevel, 1u),
.imageSubresource =
{
.aspectMask = toAspect(key.kind),
.mipLevel = mipLevel,
.baseArrayLayer = key.baseArrayLayer,
.layerCount = key.arrayLayerCount,
},
.imageExtent =
{
.width = std::max(key.extent.width >> mipLevel, 1u),
.height = std::max(key.extent.height >> mipLevel, 1u),
.depth = std::max(key.extent.depth >> mipLevel, 1u),
},
});
linearOffset += info.linearSize * key.arrayLayerCount;
}
regions.push_back({
.bufferOffset = info.offset,
.bufferRowLength =
mipLevel > 0 ? 0 : std::max(key.pitch >> mipLevel, 1u),
.imageSubresource =
{
.aspectMask = toAspect(key.kind),
.mipLevel = mipLevel,
.baseArrayLayer = key.baseArrayLayer,
.layerCount = key.arrayLayerCount,
},
.imageExtent =
{
.width = std::max(key.extent.width >> mipLevel, 1u),
.height = std::max(key.extent.height >> mipLevel, 1u),
.depth = std::max(key.extent.depth >> mipLevel, 1u),
},
});
auto detiledBuffer =
vk::Buffer::Allocate(vk::getDeviceLocalMemory(), linearOffset,
VK_BUFFER_USAGE_2_TRANSFER_DST_BIT_KHR |
VK_BUFFER_USAGE_2_TRANSFER_SRC_BIT_KHR);
dstAddress += info.linearSize * key.arrayLayerCount;
srcAddress += info.tiledSize * key.arrayLayerCount;
}
sourceBuffer = detiledBuffer.getHandle();
std::uint64_t dstAddress = detiledBuffer.getAddress();
if (!bufferRegions.empty()) {
vkCmdCopyBuffer(mScheduler->getCommandBuffer(), tiledBuffer.handle,
detiledBuffer.getHandle(), bufferRegions.size(),
bufferRegions.data());
mScheduler->afterSubmit([detiledBuffer = std::move(detiledBuffer)] {});
for (unsigned mipLevel = key.baseMipLevel;
mipLevel < key.baseMipLevel + key.mipCount; ++mipLevel) {
auto &info = surfaceInfo.getSubresourceInfo(mipLevel);
tiler.detile(*mScheduler, surfaceInfo, key.tileMode, key.dfmt,
tiledBuffer.deviceAddress, dstAddress, mipLevel, 0,
key.arrayLayerCount);
dstAddress += info.linearSize * key.arrayLayerCount;
}
}
transitionImageLayout(
mScheduler->getCommandBuffer(), image, VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresourceRange);
vkCmdCopyBufferToImage(mScheduler->getCommandBuffer(),
detiledBuffer.getHandle(), image.getHandle(),
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, regions.size(),
regions.data());
vkCmdCopyBufferToImage(
mScheduler->getCommandBuffer(), sourceBuffer, image.getHandle(),
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, regions.size(), regions.data());
transitionImageLayout(mScheduler->getCommandBuffer(), image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
VK_IMAGE_LAYOUT_GENERAL, subresourceRange);
mScheduler->afterSubmit([detiledBuffer = std::move(detiledBuffer)] {});
}
auto cached = std::make_shared<CachedImage>();
@ -834,7 +920,7 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) {
cached->acquiredDfmt = key.dfmt;
mAcquiredResources.push_back(cached);
return {.handle = cached->image.getHandle()};
return {.handle = cached->image.getHandle(), .subresource = subresourceRange};
}
Cache::ImageView Cache::Tag::getImageView(const ImageViewKey &key,
@ -867,6 +953,7 @@ Cache::ImageView Cache::Tag::getImageView(const ImageViewKey &key,
return {
.handle = cached->view.getHandle(),
.imageHandle = image.handle,
.subresource = image.subresource,
};
}
@ -933,7 +1020,7 @@ Cache::Cache(Device *device, int vmId) : mDevice(device), mVmIm(vmId) {
[kDescriptorBindings.size()];
for (std::size_t index = 0; auto stage : kGraphicsStages) {
fillStageBindings(bindings[index], stage, index, 128);
fillStageBindings(bindings[index], stage, index);
++index;
}
@ -956,7 +1043,7 @@ Cache::Cache(Device *device, int vmId) : mDevice(device), mVmIm(vmId) {
{
VkDescriptorSetLayoutBinding bindings[kDescriptorBindings.size()];
fillStageBindings(bindings, VK_SHADER_STAGE_COMPUTE_BIT, 0, 128);
fillStageBindings(bindings, VK_SHADER_STAGE_COMPUTE_BIT, 0);
VkDescriptorSetLayoutCreateInfo layoutInfo{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,

View file

@ -151,11 +151,13 @@ struct Cache {
struct Image {
VkImage handle;
VkImageSubresourceRange subresource;
};
struct ImageView {
VkImageView handle;
VkImage imageHandle;
VkImageSubresourceRange subresource;
};
class Tag {
@ -185,6 +187,8 @@ struct Cache {
mScheduler->wait();
}
Scheduler &getScheduler() const { return *mScheduler; }
~Tag() { release(); }
TagId getReadId() const { return TagId{std::uint64_t(mTagId) - 1}; }

View file

@ -1,4 +1,5 @@
#include "Device.hpp"
#include "FlipPipeline.hpp"
#include "Renderer.hpp"
#include "amdgpu/tiler.hpp"
#include "gnm/constants.hpp"
@ -255,27 +256,28 @@ bool Device::flip(std::int64_t pid, int bufferIndex, std::uint64_t arg,
gnm::DataFormat dfmt;
gnm::NumericFormat nfmt;
CbCompSwap compSwap;
auto flipType = FlipType::Alt;
switch (bufferAttr.pixelFormat) {
case 0x80000000:
// bgra
dfmt = gnm::kDataFormat8_8_8_8;
nfmt = gnm::kNumericFormatSNormNoZero;
compSwap = CbCompSwap::Alt;
nfmt = gnm::kNumericFormatSrgb;
break;
case 0x80002200:
// rgba
dfmt = gnm::kDataFormat8_8_8_8;
nfmt = gnm::kNumericFormatSNormNoZero;
compSwap = CbCompSwap::Std;
nfmt = gnm::kNumericFormatUNorm;
flipType = FlipType::Std;
break;
case 0x88740000:
case 0x88060000:
// bgra
dfmt = gnm::kDataFormat2_10_10_10;
nfmt = gnm::kNumericFormatSNormNoZero;
compSwap = CbCompSwap::Alt;
nfmt = gnm::kNumericFormatUNorm;
break;
case 0xc1060000:
dfmt = gnm::kDataFormat16_16_16_16;
nfmt = gnm::kNumericFormatSrgb;
break;
default:
@ -291,92 +293,28 @@ bool Device::flip(std::int64_t pid, int bufferIndex, std::uint64_t arg,
auto cacheTag = getCacheTag(process.vmId, scheduler);
if (false) {
transitionImageLayout(commandBuffer, swapchainImage,
VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.levelCount = 1,
.layerCount = 1,
});
transitionImageLayout(commandBuffer, swapchainImage,
VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.levelCount = 1,
.layerCount = 1,
});
amdgpu::flip(
cacheTag, commandBuffer, vk::context->swapchainExtent, buffer.address,
swapchainImageView, {bufferAttr.width, bufferAttr.height}, compSwap,
getDefaultTileModes()[bufferAttr.tilingMode == 1 ? 10 : 8], dfmt, nfmt);
amdgpu::flip(
cacheTag, commandBuffer, vk::context->swapchainExtent, buffer.address,
swapchainImageView, {bufferAttr.width, bufferAttr.height}, flipType,
getDefaultTileModes()[bufferAttr.tilingMode == 1 ? 10 : 8], dfmt, nfmt);
transitionImageLayout(commandBuffer, swapchainImage,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.levelCount = 1,
.layerCount = 1,
});
} else {
ImageKey frameKey{
.readAddress = buffer.address,
.type = gnm::TextureType::Dim2D,
.dfmt = dfmt,
.nfmt = nfmt,
.tileMode = getDefaultTileModes()[bufferAttr.tilingMode == 1 ? 10 : 8],
.extent =
{
.width = bufferAttr.width,
.height = bufferAttr.height,
.depth = 1,
},
.pitch = bufferAttr.width,
.mipCount = 1,
.arrayLayerCount = 1,
};
auto image = cacheTag.getImage(frameKey, Access::Read);
scheduler.submit();
scheduler.wait();
transitionImageLayout(commandBuffer, swapchainImage,
VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.levelCount = 1,
.layerCount = 1,
});
VkImageBlit region{
.srcSubresource = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1},
.srcOffsets = {{},
{static_cast<int32_t>(bufferAttr.width),
static_cast<int32_t>(bufferAttr.height), 1}},
.dstSubresource = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1},
.dstOffsets =
{{},
{static_cast<int32_t>(vk::context->swapchainExtent.width),
static_cast<int32_t>(vk::context->swapchainExtent.height), 1}},
};
vkCmdBlitImage(commandBuffer, image.handle, VK_IMAGE_LAYOUT_GENERAL,
swapchainImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1,
&region, VK_FILTER_LINEAR);
transitionImageLayout(commandBuffer, swapchainImage,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.levelCount = 1,
.layerCount = 1,
});
}
transitionImageLayout(commandBuffer, swapchainImage,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.levelCount = 1,
.layerCount = 1,
});
auto submitCompleteTask = scheduler.createExternalSubmit();

View file

@ -3,7 +3,7 @@
#include "Pipe.hpp"
#include "amdgpu/bridge/bridge.hpp"
#include "amdgpu/tiler_vulkan.hpp"
#include "gnm/descriptors.hpp"
#include "FlipPipeline.hpp"
#include "rx/MemoryTable.hpp"
#include "shader/SemanticInfo.hpp"
#include "shader/SpvConverter.hpp"
@ -52,9 +52,9 @@ struct Device {
Registers::Config config;
GpuTiler tiler;
GraphicsPipe graphicsPipes[kGfxPipeCount]{0, 1};
// ComputePipe computePipes[kComputePipeCount]{0, 1, 2, 3, 4, 5, 6, 7};
FlipPipeline flipPipeline;
int dmemFd[3] = {-1, -1, -1};
std::unordered_map<std::int64_t, ProcessInfo> processInfo;

309
rpcsx-gpu2/FlipPipeline.cpp Normal file
View file

@ -0,0 +1,309 @@
#include "FlipPipeline.hpp"
#include "shaders/flip.vert.h"
#include "shaders/flip_alt.frag.h"
#include "shaders/flip_std.frag.h"
#include "vk.hpp"
#include <atomic>
#include <vulkan/vulkan_core.h>
FlipPipeline::~FlipPipeline() {
vkDestroyPipeline(vk::context->device, pipelines[0], vk::context->allocator);
vkDestroyPipeline(vk::context->device, pipelines[1], vk::context->allocator);
vkDestroyPipelineLayout(vk::context->device, pipelineLayout,
vk::context->allocator);
vkDestroyDescriptorPool(vk::context->device, descriptorPool,
vk::context->allocator);
vkDestroyDescriptorSetLayout(vk::context->device, descriptorSetLayout,
vk::context->allocator);
vkDestroyShaderModule(vk::context->device, flipVertShaderModule,
vk::context->allocator);
vkDestroyShaderModule(vk::context->device, flipFragStdShaderModule,
vk::context->allocator);
vkDestroyShaderModule(vk::context->device, flipFragAltShaderModule,
vk::context->allocator);
}
FlipPipeline::FlipPipeline() {
VkShaderModuleCreateInfo flipVertexModuleInfo{
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
.codeSize = std::size(spirv_flip_vert) * sizeof(*spirv_flip_std_frag),
.pCode = spirv_flip_vert,
};
VkShaderModuleCreateInfo flipFragmentStdInfo{
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
.codeSize = std::size(spirv_flip_std_frag) * sizeof(*spirv_flip_std_frag),
.pCode = spirv_flip_std_frag,
};
VkShaderModuleCreateInfo flipFragmentAltInfo{
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
.codeSize = std::size(spirv_flip_alt_frag) * sizeof(*spirv_flip_std_frag),
.pCode = spirv_flip_alt_frag,
};
VK_VERIFY(vkCreateShaderModule(vk::context->device, &flipVertexModuleInfo,
vk::context->allocator,
&flipVertShaderModule));
VK_VERIFY(vkCreateShaderModule(vk::context->device, &flipFragmentStdInfo,
vk::context->allocator,
&flipFragStdShaderModule));
VK_VERIFY(vkCreateShaderModule(vk::context->device, &flipFragmentAltInfo,
vk::context->allocator,
&flipFragAltShaderModule));
{
VkDescriptorSetLayoutBinding bindings[] = {
{
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
},
{
.binding = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
},
};
VkDescriptorSetLayoutCreateInfo descriptorSetLayoutCreateInfo = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.bindingCount = std::size(bindings),
.pBindings = bindings,
};
vkCreateDescriptorSetLayout(vk::context->device,
&descriptorSetLayoutCreateInfo,
vk::context->allocator, &descriptorSetLayout);
}
{
VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = &descriptorSetLayout,
};
VK_VERIFY(vkCreatePipelineLayout(vk::context->device,
&pipelineLayoutCreateInfo,
vk::context->allocator, &pipelineLayout));
}
{
VkPipelineShaderStageCreateInfo stagesStd[]{
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
// .pNext = &flipVertexModuleInfo,
.stage = VK_SHADER_STAGE_VERTEX_BIT,
.module = flipVertShaderModule,
.pName = "main",
},
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
// .pNext = &flipFragmentStdInfo,
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
.module = flipFragStdShaderModule,
.pName = "main",
}};
VkPipelineShaderStageCreateInfo stagesAlt[]{
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
// .pNext = &flipVertexModuleInfo,
.stage = VK_SHADER_STAGE_VERTEX_BIT,
.module = flipVertShaderModule,
.pName = "main",
},
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
// .pNext = &flipFragmentAltInfo,
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
.module = flipFragAltShaderModule,
.pName = "main",
}};
VkPipelineVertexInputStateCreateInfo vertexInputState{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
};
VkPipelineInputAssemblyStateCreateInfo inputAssemblyState{
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
};
VkPipelineTessellationStateCreateInfo tessellationState{
.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,
};
VkPipelineRasterizationStateCreateInfo rasterizationState{
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
};
VkSampleMask sampleMask = -1;
VkPipelineMultisampleStateCreateInfo multisampleState{
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
.pSampleMask = &sampleMask,
};
VkPipelineDepthStencilStateCreateInfo depthStencilState{
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
};
VkPipelineColorBlendStateCreateInfo colorBlendState{
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
};
VkDynamicState dynamicStates[] = {
VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT,
VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT,
};
VkPipelineDynamicStateCreateInfo dynamicState{
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.dynamicStateCount = std::size(dynamicStates),
.pDynamicStates = dynamicStates,
};
VkGraphicsPipelineCreateInfo pipelineCreateInfos[]{
{
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.stageCount = std::size(stagesStd),
.pStages = stagesStd,
.pVertexInputState = &vertexInputState,
.pInputAssemblyState = &inputAssemblyState,
.pTessellationState = &tessellationState,
.pRasterizationState = &rasterizationState,
.pMultisampleState = &multisampleState,
.pDepthStencilState = &depthStencilState,
.pColorBlendState = &colorBlendState,
.pDynamicState = &dynamicState,
.layout = pipelineLayout,
},
{
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.stageCount = std::size(stagesAlt),
.pStages = stagesAlt,
.pVertexInputState = &vertexInputState,
.pInputAssemblyState = &inputAssemblyState,
.pTessellationState = &tessellationState,
.pRasterizationState = &rasterizationState,
.pMultisampleState = &multisampleState,
.pDepthStencilState = &depthStencilState,
.pColorBlendState = &colorBlendState,
.pDynamicState = &dynamicState,
.layout = pipelineLayout,
},
};
VK_VERIFY(vkCreateGraphicsPipelines(
vk::context->device, VK_NULL_HANDLE, std::size(pipelines),
pipelineCreateInfos, vk::context->allocator, pipelines));
}
{
VkDescriptorPoolSize poolSizes[]{
{
.type = VK_DESCRIPTOR_TYPE_SAMPLER,
.descriptorCount =
static_cast<std::uint32_t>(std::size(descriptorSets) * 2),
},
{
.type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
.descriptorCount =
static_cast<std::uint32_t>(std::size(descriptorSets) * 2),
}};
VkDescriptorPoolCreateInfo descriptorPoolCreateInfo{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.maxSets = static_cast<std::uint32_t>(std::size(descriptorSets) * 2),
.poolSizeCount = std::size(poolSizes),
.pPoolSizes = poolSizes,
};
VK_VERIFY(vkCreateDescriptorPool(vk::context->device,
&descriptorPoolCreateInfo,
vk::context->allocator, &descriptorPool));
}
for (auto &set : descriptorSets) {
VkDescriptorSetAllocateInfo descriptorSetAllocateInfo{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.descriptorPool = descriptorPool,
.descriptorSetCount = 1,
.pSetLayouts = &descriptorSetLayout,
};
VK_VERIFY(vkAllocateDescriptorSets(vk::context->device,
&descriptorSetAllocateInfo, &set));
}
}
void FlipPipeline::bind(Scheduler &sched, FlipType type, VkImageView imageView,
VkSampler sampler) {
auto cmdBuffer = sched.getCommandBuffer();
auto allocateDescriptorSetIndex = [this] {
auto mask = freeDescriptorSets.load(std::memory_order::acquire);
while (true) {
auto index = std::countr_one(mask);
if (index >= std::size(descriptorSets)) {
mask = freeDescriptorSets.load(std::memory_order::relaxed);
continue;
}
if (!freeDescriptorSets.compare_exchange_weak(
mask, mask | (1 << index), std::memory_order::release,
std::memory_order::relaxed)) {
continue;
}
return index;
}
};
auto descriptorIndex = allocateDescriptorSetIndex();
sched.afterSubmit([this, descriptorIndex] {
decltype(freeDescriptorSets)::value_type mask = 1 << descriptorIndex;
while (!freeDescriptorSets.compare_exchange_weak(
mask, mask & ~(1 << descriptorIndex), std::memory_order::release,
std::memory_order::acquire)) {
}
});
auto descriptorSet = descriptorSets[descriptorIndex];
VkDescriptorImageInfo imageInfo = {
.sampler = sampler,
.imageView = imageView,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
VkWriteDescriptorSet writeDescSets[]{
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstSet = descriptorSet,
.dstBinding = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
.pImageInfo = &imageInfo,
},
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstSet = descriptorSet,
.dstBinding = 1,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER,
.pImageInfo = &imageInfo,
},
};
vkUpdateDescriptorSets(vk::context->device, std::size(writeDescSets),
writeDescSets, 0, nullptr);
vkCmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
pipelineLayout, 0, 1, &descriptorSet, 0, nullptr);
vkCmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
pipelines[static_cast<int>(type)]);
}

View file

@ -0,0 +1,30 @@
#pragma once
#include "Scheduler.hpp"
#include <atomic>
#include <cstdint>
#include <vulkan/vulkan.h>
enum class FlipType {
Std,
Alt,
};
struct FlipPipeline {
VkShaderModule flipVertShaderModule{};
VkShaderModule flipFragStdShaderModule{};
VkShaderModule flipFragAltShaderModule{};
VkPipelineLayout pipelineLayout{};
VkDescriptorSetLayout descriptorSetLayout{};
VkPipeline pipelines[2]{};
VkDescriptorPool descriptorPool{};
VkDescriptorSet descriptorSets[8]{};
std::atomic<std::uint8_t> freeDescriptorSets{0};
FlipPipeline(const FlipPipeline &) = delete;
FlipPipeline();
~FlipPipeline();
void bind(Scheduler &sched, FlipType type, VkImageView imageView,
VkSampler sampler);
};

View file

@ -12,8 +12,6 @@
#include <shader/dialect.hpp>
#include <shader/gcn.hpp>
#include <shaders/fill_red.frag.h>
#include <shaders/flip.frag.h>
#include <shaders/flip.vert.h>
#include <shaders/rect_list.geom.h>
#include <bit>
@ -114,50 +112,6 @@ static VkShaderEXT getFillRedFragShader(amdgpu::Cache &cache) {
return shader;
}
static VkShaderEXT getFlipVertexShader(amdgpu::Cache &cache) {
static VkShaderEXT shader = VK_NULL_HANDLE;
if (shader != VK_NULL_HANDLE) {
return shader;
}
VkShaderCreateInfoEXT createInfo{
.sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT,
.stage = VK_SHADER_STAGE_VERTEX_BIT,
.codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT,
.codeSize = sizeof(spirv_flip_vert),
.pCode = spirv_flip_vert,
.pName = "main",
.setLayoutCount =
static_cast<uint32_t>(cache.getGraphicsDescriptorSetLayouts().size()),
.pSetLayouts = cache.getGraphicsDescriptorSetLayouts().data()};
VK_VERIFY(vk::CreateShadersEXT(vk::context->device, 1, &createInfo,
vk::context->allocator, &shader));
return shader;
}
static VkShaderEXT getFlipFragmentShader(amdgpu::Cache &cache) {
static VkShaderEXT shader = VK_NULL_HANDLE;
if (shader != VK_NULL_HANDLE) {
return shader;
}
VkShaderCreateInfoEXT createInfo{
.sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT,
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
.codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT,
.codeSize = sizeof(spirv_flip_frag),
.pCode = spirv_flip_frag,
.pName = "main",
.setLayoutCount =
static_cast<uint32_t>(cache.getGraphicsDescriptorSetLayouts().size()),
.pSetLayouts = cache.getGraphicsDescriptorSetLayouts().data()};
VK_VERIFY(vk::CreateShadersEXT(vk::context->device, 1, &createInfo,
vk::context->allocator, &shader));
return shader;
}
static VkPrimitiveTopology toVkPrimitiveType(gnm::PrimitiveType type) {
switch (type) {
case gnm::PrimitiveType::PointList:
@ -728,7 +682,7 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
.vgprCount = pgm.rsrc1.getVGprCount(),
.sgprCount = pgm.rsrc1.getSGprCount(),
.userSgprs = std::span(pgm.userData.data(), pgm.rsrc2.userSgpr),
// .supportsBarycentric = vk::context->supportsBarycentric,
.supportsBarycentric = vk::context->supportsBarycentric,
.supportsInt8 = vk::context->supportsInt8,
.supportsInt64Atomics = vk::context->supportsInt64Atomics,
};
@ -1162,50 +1116,20 @@ transitionImageLayout(VkCommandBuffer commandBuffer, VkImage image,
void amdgpu::flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer,
VkExtent2D targetExtent, std::uint64_t address,
VkImageView target, VkExtent2D imageExtent,
CbCompSwap compSwap, TileMode tileMode, gnm::DataFormat dfmt,
FlipType type, TileMode tileMode, gnm::DataFormat dfmt,
gnm::NumericFormat nfmt) {
auto pipelineLayout = cacheTag.getGraphicsPipelineLayout();
auto descriptorSets = cacheTag.createGraphicsDescriptorSets();
ImageViewKey framebuffer{};
framebuffer.type = gnm::TextureType::Dim2D;
framebuffer.pitch = imageExtent.width;
framebuffer.readAddress = address;
framebuffer.type = gnm::TextureType::Dim2D;
framebuffer.dfmt = dfmt;
framebuffer.nfmt = nfmt;
framebuffer.tileMode = tileMode;
framebuffer.extent.width = imageExtent.width;
framebuffer.extent.height = imageExtent.height;
framebuffer.extent.depth = 1;
framebuffer.dfmt = dfmt;
framebuffer.nfmt = nfmt;
framebuffer.pitch = imageExtent.width;
framebuffer.mipCount = 1;
framebuffer.arrayLayerCount = 1;
framebuffer.tileMode = tileMode;
switch (compSwap) {
case CbCompSwap::Std:
framebuffer.R = gnm::Swizzle::R;
framebuffer.G = gnm::Swizzle::G;
framebuffer.B = gnm::Swizzle::B;
framebuffer.A = gnm::Swizzle::A;
break;
case CbCompSwap::Alt:
framebuffer.R = gnm::Swizzle::B;
framebuffer.G = gnm::Swizzle::G;
framebuffer.B = gnm::Swizzle::R;
framebuffer.A = gnm::Swizzle::A;
break;
case CbCompSwap::StdRev:
framebuffer.R = gnm::Swizzle::A;
framebuffer.G = gnm::Swizzle::B;
framebuffer.B = gnm::Swizzle::G;
framebuffer.A = gnm::Swizzle::R;
break;
case CbCompSwap::AltRev:
framebuffer.R = gnm::Swizzle::A;
framebuffer.G = gnm::Swizzle::R;
framebuffer.B = gnm::Swizzle::G;
framebuffer.A = gnm::Swizzle::B;
break;
}
SamplerKey framebufferSampler = {
.magFilter = VK_FILTER_LINEAR,
@ -1215,49 +1139,20 @@ void amdgpu::flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer,
auto imageView = cacheTag.getImageView(framebuffer, Access::Read);
auto sampler = cacheTag.getSampler(framebufferSampler);
cacheTag.submitAndWait();
VkDescriptorImageInfo imageInfo{
.sampler = sampler.handle,
.imageView = imageView.handle,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
};
VkWriteDescriptorSet writeDescSet[]{
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstSet = descriptorSets[0],
.dstBinding =
Cache::getDescriptorBinding(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 2),
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
.pImageInfo = &imageInfo,
},
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstSet = descriptorSets[0],
.dstBinding = Cache::getDescriptorBinding(VK_DESCRIPTOR_TYPE_SAMPLER),
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER,
.pImageInfo = &imageInfo,
}};
vkUpdateDescriptorSets(vk::context->device, std::size(writeDescSet),
writeDescSet, 0, nullptr);
VkRenderingAttachmentInfo colorAttachments[1]{{
.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
.imageView = target,
.imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
.clearValue = {},
}};
VkBool32 colorBlendEnable[1]{VK_FALSE};
VkColorBlendEquationEXT colorBlendEquation[1]{};
VkColorComponentFlags colorWriteMask[1]{
VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT};
VkViewport viewPorts[1]{
{
.width = float(targetExtent.width),
@ -1282,87 +1177,16 @@ void amdgpu::flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer,
.pColorAttachments = colorAttachments,
};
commandBuffer = cacheTag.getScheduler().getCommandBuffer();
vkCmdBeginRendering(commandBuffer, &renderInfo);
vkCmdSetRasterizerDiscardEnable(commandBuffer, VK_FALSE);
cacheTag.getDevice()->flipPipeline.bind(cacheTag.getScheduler(), type, imageView.handle, sampler.handle);
vkCmdSetViewportWithCount(commandBuffer, 1, viewPorts);
vkCmdSetScissorWithCount(commandBuffer, 1, viewPortScissors);
vk::CmdSetColorBlendEnableEXT(commandBuffer, 0, 1, colorBlendEnable);
vk::CmdSetColorBlendEquationEXT(commandBuffer, 0, 1, colorBlendEquation);
vk::CmdSetDepthClampEnableEXT(commandBuffer, VK_FALSE);
vkCmdSetDepthTestEnable(commandBuffer, VK_FALSE);
vkCmdSetDepthWriteEnable(commandBuffer, VK_FALSE);
vkCmdSetDepthBounds(commandBuffer, 0.0f, 1.0f);
vkCmdSetDepthBoundsTestEnable(commandBuffer, VK_FALSE);
vkCmdSetDepthBiasEnable(commandBuffer, VK_FALSE);
vkCmdSetDepthBias(commandBuffer, 0, 1, 1);
vkCmdSetPrimitiveRestartEnable(commandBuffer, VK_FALSE);
vk::CmdSetAlphaToOneEnableEXT(commandBuffer, VK_FALSE);
vk::CmdSetLogicOpEnableEXT(commandBuffer, VK_FALSE);
vk::CmdSetLogicOpEXT(commandBuffer, VK_LOGIC_OP_AND);
vk::CmdSetPolygonModeEXT(commandBuffer, VK_POLYGON_MODE_FILL);
vk::CmdSetRasterizationSamplesEXT(commandBuffer, VK_SAMPLE_COUNT_1_BIT);
VkSampleMask sampleMask = ~0;
vk::CmdSetSampleMaskEXT(commandBuffer, VK_SAMPLE_COUNT_1_BIT, &sampleMask);
vk::CmdSetTessellationDomainOriginEXT(
commandBuffer, VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT);
vk::CmdSetAlphaToCoverageEnableEXT(commandBuffer, VK_FALSE);
vk::CmdSetVertexInputEXT(commandBuffer, 0, nullptr, 0, nullptr);
vk::CmdSetColorWriteMaskEXT(commandBuffer, 0, 1, colorWriteMask);
vkCmdSetStencilCompareMask(commandBuffer, VK_STENCIL_FACE_FRONT_AND_BACK, 0);
vkCmdSetStencilWriteMask(commandBuffer, VK_STENCIL_FACE_FRONT_AND_BACK, 0);
vkCmdSetStencilReference(commandBuffer, VK_STENCIL_FACE_FRONT_AND_BACK, 0);
vkCmdSetCullMode(commandBuffer, VK_CULL_MODE_NONE);
vkCmdSetFrontFace(commandBuffer, VK_FRONT_FACE_CLOCKWISE);
vkCmdSetPrimitiveTopology(commandBuffer, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST);
vkCmdSetStencilTestEnable(commandBuffer, VK_FALSE);
auto stages = Cache::kGraphicsStages;
VkShaderEXT shaders[stages.size()]{};
shaders[Cache::getStageIndex(VK_SHADER_STAGE_VERTEX_BIT)] =
getFlipVertexShader(*cacheTag.getCache());
shaders[Cache::getStageIndex(VK_SHADER_STAGE_FRAGMENT_BIT)] =
getFlipFragmentShader(*cacheTag.getCache());
vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
pipelineLayout, 0, descriptorSets.size(),
descriptorSets.data(), 0, nullptr);
vk::CmdBindShadersEXT(commandBuffer, stages.size(), stages.data(), shaders);
vkCmdDraw(commandBuffer, 6, 1, 0, 0);
vkCmdEndRendering(commandBuffer);
// {
// VkImageMemoryBarrier barrier{
// .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
// .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT,
// .dstAccessMask = VK_ACCESS_NONE,
// .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
// .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
// .image = imageView.imageHandle,
// .subresourceRange =
// {
// .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
// .levelCount = 1,
// .layerCount = 1,
// },
// };
// vkCmdPipelineBarrier(commandBuffer,
// VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
// VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, 0, nullptr,
// 0, nullptr, 1, &barrier);
// }
cacheTag.getScheduler().submit();
}

View file

@ -1,6 +1,7 @@
#pragma once
#include "Cache.hpp"
#include "FlipPipeline.hpp"
#include "Pipe.hpp"
#include <cstdint>
#include <vulkan/vulkan_core.h>
@ -12,6 +13,6 @@ void draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
std::uint32_t indexCount);
void flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer,
VkExtent2D targetExtent, std::uint64_t address, VkImageView target,
VkExtent2D imageExtent, CbCompSwap compSwap, TileMode tileMode,
VkExtent2D imageExtent, FlipType type, TileMode tileMode,
gnm::DataFormat dfmt, gnm::NumericFormat nfmt);
} // namespace amdgpu

View file

@ -190,9 +190,8 @@ void amdgpu::GpuTiler::detile(Scheduler &scheduler,
configOffset);
auto &subresource = info.getSubresourceInfo(mipLevel);
config->srcAddress = srcTiledAddress + subresource.offset +
(subresource.tiledSize * baseArray);
config->dstAddress = dstLinearAddress + (subresource.linearSize * baseArray);
config->srcAddress = srcTiledAddress + subresource.offset;
config->dstAddress = dstLinearAddress;
config->dataWidth = subresource.dataWidth;
config->dataHeight = subresource.dataHeight;
config->tileMode = tileMode.raw;
@ -287,9 +286,8 @@ void amdgpu::GpuTiler::tile(Scheduler &scheduler,
configOffset);
auto &subresource = info.getSubresourceInfo(mipLevel);
config->srcAddress = srcLinearAddress + subresource.offset +
subresource.linearSize * baseArray;
config->dstAddress = dstTiledAddress;
config->srcAddress = srcLinearAddress;
config->dstAddress = dstTiledAddress + subresource.offset;
config->dataWidth = subresource.dataWidth;
config->dataHeight = subresource.dataHeight;
config->tileMode = tileMode.raw;

View file

@ -2542,6 +2542,22 @@ int findTexture3DIndex(int32_t textureIndexHint, uint32_t tbuffer[8]) {
return textureIndexHint;
}
float32_t swizzle(f32vec4 comp, int sel) {
switch (sel) {
case 0: return 0;
case 1: return 1;
case 4: return comp.x;
case 5: return comp.y;
case 6: return comp.z;
case 7: return comp.w;
}
return 1;
}
f32vec4 swizzle(f32vec4 comp, int selX, int selY, int selZ, int selW) {
return f32vec4(swizzle(comp, selX), swizzle(comp, selY), swizzle(comp, selZ), swizzle(comp, selW));
}
void image_sample(inout f32vec4 vdata, f32vec3 vaddr, int32_t textureIndexHint, uint32_t tbuffer[8], int32_t samplerIndexHint, u32vec4 ssampler, uint32_t dmask) {
uint8_t textureType = tbuffer_type(tbuffer);
@ -2582,6 +2598,14 @@ void image_sample(inout f32vec4 vdata, f32vec3 vaddr, int32_t textureIndexHint,
// debugPrintfEXT("image_sample: textureType: %u, coord: %v3f, result: %v4f, dmask: %u", textureType, vaddr, result, dmask);
result = swizzle(result,
tbuffer_dst_sel_x(tbuffer),
tbuffer_dst_sel_y(tbuffer),
tbuffer_dst_sel_z(tbuffer),
tbuffer_dst_sel_w(tbuffer));
int vdataIndex = 0;
for (int i = 0; i < 4; ++i) {
if ((dmask & (1 << i)) != 0) {

View file

@ -263,7 +263,7 @@ int main(int argc, const char *argv[]) {
glfwInit();
glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API);
auto window = glfwCreateWindow(1280, 720, "RPCSX", nullptr, nullptr);
auto window = glfwCreateWindow(1920, 1080, "RPCSX", nullptr, nullptr);
rx::atScopeExit _{[window] { glfwDestroyWindow(window); }};

View file

@ -1,11 +0,0 @@
#version 450
layout(location = 0) in vec2 coord;
layout(location = 0) out vec4 color;
layout(set = 0, binding = 1) uniform sampler samp[];
layout(set = 0, binding = 3) uniform texture2D tex[];
void main()
{
color = vec4(texture(sampler2D(tex[0], samp[0]), coord.xy).xyz, 1);
}

View file

@ -2,7 +2,6 @@
layout(location = 0) out vec2 coord;
void main()
{
float x = float(((gl_VertexIndex + 2) / 3) & 1) * 2 - 1;

View file

@ -0,0 +1,11 @@
#version 450
layout(location = 0) in vec2 coord;
layout(location = 0) out vec4 color;
layout(binding = 0) uniform texture2D tex;
layout(binding = 1) uniform sampler samp;
void main()
{
color = vec4(texture(sampler2D(tex, samp), coord.xy).xyz, 1).bgra;
}

View file

@ -0,0 +1,11 @@
#version 450
layout(location = 0) in vec2 coord;
layout(location = 0) out vec4 color;
layout(binding = 0) uniform texture2D tex;
layout(binding = 1) uniform sampler samp;
void main()
{
color = vec4(texture(sampler2D(tex, samp), coord.xy).xyz, 1).rgba;
}