gpu: implement image buffer

This commit is contained in:
DH 2024-10-20 23:32:59 +03:00
parent deb09371bc
commit 0287d42aa5
6 changed files with 826 additions and 348 deletions

File diff suppressed because it is too large Load diff

View file

@ -28,6 +28,30 @@ struct ShaderKey {
enum class ImageKind : std::uint8_t { Color, Depth, Stencil };
struct ImageViewKey {
std::uint64_t readAddress;
std::uint64_t writeAddress;
gnm::TextureType type;
gnm::DataFormat dfmt;
gnm::NumericFormat nfmt;
TileMode tileMode = {};
VkOffset3D offset = {0, 0, 0};
VkExtent3D extent = {1, 1, 1};
std::uint32_t pitch = 1;
unsigned baseMipLevel = 0;
unsigned mipCount = 1;
unsigned baseArrayLayer = 0;
unsigned arrayLayerCount = 1;
ImageKind kind = ImageKind::Color;
bool pow2pad = false;
gnm::Swizzle r = gnm::Swizzle::R;
gnm::Swizzle g = gnm::Swizzle::G;
gnm::Swizzle b = gnm::Swizzle::B;
gnm::Swizzle a = gnm::Swizzle::A;
static ImageViewKey createFrom(const gnm::TBuffer &tbuffer);
};
struct ImageKey {
std::uint64_t readAddress;
std::uint64_t writeAddress;
@ -45,6 +69,24 @@ struct ImageKey {
bool pow2pad = false;
static ImageKey createFrom(const gnm::TBuffer &tbuffer);
static ImageKey createFrom(const ImageViewKey &imageView);
};
struct ImageBufferKey {
std::uint64_t address;
gnm::TextureType type;
gnm::DataFormat dfmt;
TileMode tileMode = {};
VkExtent3D extent = {1, 1, 1};
std::uint32_t pitch = 1;
unsigned baseMipLevel = 0;
unsigned mipCount = 1;
unsigned baseArrayLayer = 0;
unsigned arrayLayerCount = 1;
bool pow2pad = false;
static ImageBufferKey createFrom(const gnm::TBuffer &tbuffer);
static ImageBufferKey createFrom(const ImageKey &imageKey);
};
struct SamplerKey {
@ -74,6 +116,7 @@ struct Cache {
HostVisibleBuffer,
DeviceLocalBuffer,
IndexBuffer,
ImageBuffer,
Image,
Shader,
@ -144,6 +187,13 @@ struct Cache {
std::byte *data;
};
struct ImageBuffer {
VkBuffer handle = VK_NULL_HANDLE;
std::uint64_t offset;
std::uint64_t deviceAddress;
TagId tagId;
};
struct IndexBuffer {
VkBuffer handle = VK_NULL_HANDLE;
std::uint64_t offset;
@ -240,6 +290,7 @@ private:
};
std::vector<std::shared_ptr<Entry>> mAcquiredImageResources;
std::vector<std::shared_ptr<Entry>> mAcquiredImageBufferResources;
std::vector<std::shared_ptr<Entry>> mAcquiredMemoryResources;
std::vector<std::shared_ptr<Entry>> mAcquiredViewResources;
std::vector<MemoryTableConfigSlot> memoryTableConfigSlots;
@ -251,6 +302,7 @@ private:
void clear() {
mAcquiredImageResources.clear();
mAcquiredImageBufferResources.clear();
mAcquiredMemoryResources.clear();
memoryTableConfigSlots.clear();
descriptorBuffers.clear();
@ -306,8 +358,9 @@ public:
std::uint32_t indexCount,
gnm::PrimitiveType primType,
gnm::IndexType indexType);
ImageBuffer getImageBuffer(const ImageBufferKey &key, Access access);
Image getImage(const ImageKey &key, Access access);
ImageView getImageView(const ImageKey &key, Access access);
ImageView getImageView(const ImageViewKey &key, Access access);
void readMemory(void *target, rx::AddressRange range);
void writeMemory(const void *source, rx::AddressRange range);
int compareMemory(const void *source, rx::AddressRange range);
@ -503,6 +556,7 @@ public:
auto &getTable(EntryType type) { return mTables[static_cast<int>(type)]; }
rx::AddressRange flushImages(Tag &tag, rx::AddressRange range);
rx::AddressRange flushImageBuffers(Tag &tag, rx::AddressRange range);
rx::AddressRange flushBuffers(rx::AddressRange range);
private:

View file

@ -224,9 +224,15 @@ Device::Device() : vkContext(createVkContext(this)) {
rx::AddressRange::fromBeginSize(address, rx::mem::pageSize);
auto tag = getCacheTag(vmId, sched);
tag.getCache()->flushImages(tag, range);
sched.submit();
sched.wait();
if (tag.getCache()->flushImages(tag, range)) {
sched.submit();
sched.wait();
}
if (tag.getCache()->flushImageBuffers(tag, range)) {
sched.submit();
sched.wait();
}
auto flushedRange = tag.getCache()->flushBuffers(range);

View file

@ -179,16 +179,22 @@ FlipPipeline::FlipPipeline() {
.pColorAttachmentFormats = &colorFormat,
};
VkPipelineViewportStateCreateInfo viewportState{
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.viewportCount = 1,
.scissorCount = 1,
};
VkGraphicsPipelineCreateInfo pipelineCreateInfos[]{
{
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = &info,
.flags = VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT,
.stageCount = std::size(stagesStd),
.pStages = stagesStd,
.pVertexInputState = &vertexInputState,
.pInputAssemblyState = &inputAssemblyState,
.pTessellationState = &tessellationState,
.pViewportState = &viewportState,
.pRasterizationState = &rasterizationState,
.pMultisampleState = &multisampleState,
.pDepthStencilState = &depthStencilState,
@ -199,12 +205,12 @@ FlipPipeline::FlipPipeline() {
{
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = &info,
.flags = VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT,
.stageCount = std::size(stagesAlt),
.pStages = stagesAlt,
.pVertexInputState = &vertexInputState,
.pInputAssemblyState = &inputAssemblyState,
.pTessellationState = &tessellationState,
.pViewportState = &viewportState,
.pRasterizationState = &rasterizationState,
.pMultisampleState = &multisampleState,
.pDepthStencilState = &depthStencilState,

View file

@ -6,6 +6,7 @@
#include <gnm/constants.hpp>
#include <gnm/vulkan.hpp>
#include <print>
#include <rx/format.hpp>
#include <shader/Evaluator.hpp>
#include <shader/dialect.hpp>
#include <shader/gcn.hpp>
@ -158,9 +159,10 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
return;
}
if (pipe.context.cbColorControl.mode != gnm::CbMode::Normal) {
if (pipe.context.cbColorControl.mode != gnm::CbMode::Normal &&
pipe.context.cbColorControl.mode != gnm::CbMode::EliminateFastClear) {
std::println("unimplemented context.cbColorControl.mode = {}",
static_cast<int>(pipe.context.cbColorControl.mode));
static_cast<gnm::CbMode>(pipe.context.cbColorControl.mode));
return;
}
@ -242,14 +244,16 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
auto vkViewPortScissor = gnm::toVkRect2D(viewPortScissor);
viewPortScissors[renderTargets] = vkViewPortScissor;
ImageKey renderTargetInfo{};
ImageViewKey renderTargetInfo{};
renderTargetInfo.type = gnm::TextureType::Dim2D;
renderTargetInfo.pitch = vkViewPortScissor.extent.width;
renderTargetInfo.readAddress = static_cast<std::uint64_t>(cbColor.base)
<< 8;
renderTargetInfo.writeAddress = renderTargetInfo.readAddress;
renderTargetInfo.extent.width = vkViewPortScissor.extent.width;
renderTargetInfo.extent.height = vkViewPortScissor.extent.height;
renderTargetInfo.extent.width =
vkViewPortScissor.offset.x + vkViewPortScissor.extent.width;
renderTargetInfo.extent.height =
vkViewPortScissor.offset.y + vkViewPortScissor.extent.height;
renderTargetInfo.extent.depth = 1;
renderTargetInfo.dfmt = cbColor.info.dfmt;
renderTargetInfo.nfmt =
@ -273,7 +277,8 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
if (pipe.uConfig.vgtPrimitiveType == gnm::PrimitiveType::None) {
if (cbColor.info.fastClear) {
auto image = cacheTag.getImage(renderTargetInfo, access);
auto image =
cacheTag.getImage(ImageKey::createFrom(renderTargetInfo), access);
VkClearColorValue clearValue = {
.uint32 =
{
@ -618,7 +623,7 @@ void amdgpu::flip(Cache::Tag &cacheTag, VkExtent2D targetExtent,
std::uint64_t address, VkImageView target,
VkExtent2D imageExtent, FlipType type, TileMode tileMode,
gnm::DataFormat dfmt, gnm::NumericFormat nfmt) {
ImageKey framebuffer{};
ImageViewKey framebuffer{};
framebuffer.readAddress = address;
framebuffer.type = gnm::TextureType::Dim2D;
framebuffer.dfmt = dfmt;
@ -687,8 +692,8 @@ void amdgpu::flip(Cache::Tag &cacheTag, VkExtent2D targetExtent,
cacheTag.getDevice()->flipPipeline.bind(cacheTag.getScheduler(), type,
imageView.handle, sampler.handle);
vkCmdSetViewportWithCount(commandBuffer, 1, &viewPort);
vkCmdSetScissorWithCount(commandBuffer, 1, viewPortScissors);
vkCmdSetViewport(commandBuffer, 0, 1, &viewPort);
vkCmdSetScissor(commandBuffer, 0, 1, viewPortScissors);
vkCmdDraw(commandBuffer, 6, 1, 0, 0);
vkCmdEndRendering(commandBuffer);

View file

@ -63,6 +63,23 @@ DEFINE_SIZEOF(float64_t, 8);
uint thread_id;
uint64_t exec;
float32_t swizzle(f32vec4 comp, int sel) {
switch (sel) {
case 0: return 0;
case 1: return 1;
case 4: return comp.x;
case 5: return comp.y;
case 6: return comp.z;
case 7: return comp.w;
}
return 1;
}
f32vec4 swizzle(f32vec4 comp, int selX, int selY, int selZ, int selW) {
return f32vec4(swizzle(comp, selX), swizzle(comp, selY), swizzle(comp, selZ), swizzle(comp, selW));
}
int32_t sext(int32_t x, uint bits) {
return bits == 32 ? x : (x << (32 - bits)) >> (32 - bits);
}
@ -2164,12 +2181,12 @@ uint32_t[16] s_load_dwordx16(int32_t memoryLocationHint, uint64_t sbase, int32_t
}
#define S_BUFFER_LOAD_DWORD(dest, memoryLocationHint, vbuffer, offset, N) \
uint64_t base_address = vbuffer_base(vbuffer) & ~0x3; \
uint64_t base_address = vbuffer_base(vbuffer) & ~0x3ul; \
uint64_t stride = vbuffer_stride(vbuffer); \
uint64_t num_records = vbuffer_num_records(vbuffer); \
uint64_t size = (stride == 0 ? 1 : stride) * num_records; \
uint64_t deviceAreaSize = 0; \
uint64_t deviceAddress = findMemoryAddress(base_address + offset, size, memoryLocationHint, deviceAreaSize); \
uint64_t deviceAddress = findMemoryAddress(base_address + (offset & ~0x3ul), size, memoryLocationHint, deviceAreaSize); \
int32_t _offset = 0; \
for (int i = 0; i < N; i++) { \
if (deviceAddress == kInvalidAddress || _offset + SIZEOF(uint32_t) > deviceAreaSize) { \
@ -2563,6 +2580,55 @@ uint8_t ssampler_border_color_type(u32vec4 ssampler) {
return uint8_t(U32ARRAY_FETCH_BITS(ssampler, 126, 2));
}
uint64_t image_memory_table;
uint64_t findImageMemoryAddress(uint64_t address, uint64_t size, int32_t hint, out uint64_t areaSize) {
MemoryTable mt = MemoryTable(image_memory_table);
uint32_t pivot;
uint32_t slotCount = mt.count;
if (hint < 0 || hint >= slotCount) {
pivot = slotCount / 2;
} else {
pivot = uint32_t(hint);
}
uint32_t begin = 0;
uint32_t end = slotCount;
while (begin < end) {
MemoryTableSlot slot = mt.slots[pivot];
uint64_t slotSize = getSlotSize(slot);
if (slot.address >= address + size) {
end = pivot;
} else if (address >= slot.address + slotSize) {
begin = pivot + 1;
} else {
uint64_t offset = address - slot.address;
areaSize = slotSize - offset;
return slot.deviceAddress + offset;
}
pivot = begin + ((end - begin) / 2);
}
return kInvalidAddress;
}
int findSamplerIndex(int32_t samplerIndexHint, u32vec4 ssampler) {
return samplerIndexHint;
}
int findTexture1DIndex(int32_t textureIndexHint, uint32_t tbuffer[8]) {
return textureIndexHint;
}
int findTexture2DIndex(int32_t textureIndexHint, uint32_t tbuffer[8]) {
return textureIndexHint;
}
int findTexture3DIndex(int32_t textureIndexHint, uint32_t tbuffer[8]) {
return textureIndexHint;
}
// void image_gather4(inout u32vec4 vdata, u32vec4 vaddr, int32_t textureIndexHint, uint32_t tbuffer[8], int32_t samplerIndexHint, u32vec4 samplerDescriptor) {}
// image_gather4_cl
// image_gather4_l
@ -2588,37 +2654,6 @@ uint8_t ssampler_border_color_type(u32vec4 ssampler) {
// image_gather4_c_b_cl_o
// image_gather4_c_lz_o
int findSamplerIndex(int32_t samplerIndexHint, u32vec4 ssampler) {
return samplerIndexHint;
}
int findTexture1DIndex(int32_t textureIndexHint, uint32_t tbuffer[8]) {
return textureIndexHint;
}
int findTexture2DIndex(int32_t textureIndexHint, uint32_t tbuffer[8]) {
return textureIndexHint;
}
int findTexture3DIndex(int32_t textureIndexHint, uint32_t tbuffer[8]) {
return textureIndexHint;
}
float32_t swizzle(f32vec4 comp, int sel) {
switch (sel) {
case 0: return 0;
case 1: return 1;
case 4: return comp.x;
case 5: return comp.y;
case 6: return comp.z;
case 7: return comp.w;
}
return 1;
}
f32vec4 swizzle(f32vec4 comp, int selX, int selY, int selZ, int selW) {
return f32vec4(swizzle(comp, selX), swizzle(comp, selY), swizzle(comp, selZ), swizzle(comp, selW));
}
// void image_atomic_add() {
// // imageAtomicAdd
// }
@ -2676,13 +2711,6 @@ void image_load(inout f32vec4 vdata, i32vec3 vaddr, int32_t textureIndexHint, ui
return;
}
result = swizzle(result,
tbuffer_dst_sel_x(tbuffer),
tbuffer_dst_sel_y(tbuffer),
tbuffer_dst_sel_z(tbuffer),
tbuffer_dst_sel_w(tbuffer));
int vdataIndex = 0;
for (int i = 0; i < 4; ++i) {
if ((dmask & (1 << i)) != 0) {
@ -2721,13 +2749,6 @@ void image_load_mip(inout f32vec4 vdata, u32vec4 vaddr_u, int32_t textureIndexHi
return;
}
result = swizzle(result,
tbuffer_dst_sel_x(tbuffer),
tbuffer_dst_sel_y(tbuffer),
tbuffer_dst_sel_z(tbuffer),
tbuffer_dst_sel_w(tbuffer));
int vdataIndex = 0;
for (int i = 0; i < 4; ++i) {
if ((dmask & (1 << i)) != 0) {
@ -2782,13 +2803,6 @@ void image_sample(inout f32vec4 vdata, f32vec3 vaddr, int32_t textureIndexHint,
// debugPrintfEXT("image_sample: textureType: %u, coord: %v3f, result: %v4f, dmask: %u", textureType, vaddr, result, dmask);
result = swizzle(result,
tbuffer_dst_sel_x(tbuffer),
tbuffer_dst_sel_y(tbuffer),
tbuffer_dst_sel_z(tbuffer),
tbuffer_dst_sel_w(tbuffer));
int vdataIndex = 0;
for (int i = 0; i < 4; ++i) {
if ((dmask & (1 << i)) != 0) {
@ -2837,13 +2851,6 @@ void image_sample_l(inout f32vec4 vdata, f32vec4 vaddr, int32_t textureIndexHint
return;
}
result = swizzle(result,
tbuffer_dst_sel_x(tbuffer),
tbuffer_dst_sel_y(tbuffer),
tbuffer_dst_sel_z(tbuffer),
tbuffer_dst_sel_w(tbuffer));
int vdataIndex = 0;
for (int i = 0; i < 4; ++i) {
if ((dmask & (1 << i)) != 0) {
@ -2854,7 +2861,50 @@ void image_sample_l(inout f32vec4 vdata, f32vec4 vaddr, int32_t textureIndexHint
// image_sample_b
// image_sample_b_cl
// image_sample_lz
void image_sample_lz(inout f32vec4 vdata, f32vec3 vaddr, int32_t textureIndexHint, uint32_t tbuffer[8], int32_t samplerIndexHint, u32vec4 ssampler, uint32_t dmask) {
uint8_t textureType = tbuffer_type(tbuffer);
f32vec4 result;
switch (uint(textureType)) {
case kTextureType1D:
case kTextureTypeArray1D:
result = textureLod(
sampler1D(
textures1D[findTexture1DIndex(textureIndexHint, tbuffer)],
samplers[findSamplerIndex(samplerIndexHint, ssampler)]
), vaddr.x, 0);
break;
case kTextureType2D:
case kTextureTypeCube:
case kTextureTypeArray2D:
case kTextureTypeMsaa2D:
case kTextureTypeMsaaArray2D:
result = textureLod(
sampler2D(
textures2D[findTexture2DIndex(textureIndexHint, tbuffer)],
samplers[findSamplerIndex(samplerIndexHint, ssampler)]
), vaddr.xy, 0);
break;
case kTextureType3D:
result = textureLod(
sampler3D(
textures3D[findTexture3DIndex(textureIndexHint, tbuffer)],
samplers[findSamplerIndex(samplerIndexHint, ssampler)]
), vaddr.xyz, 0);
break;
default:
return;
}
int vdataIndex = 0;
for (int i = 0; i < 4; ++i) {
if ((dmask & (1 << i)) != 0) {
vdata[vdataIndex++] = result[i];
}
}
}
// image_sample_c
// image_sample_c_cl
// image_sample_c_d