mirror of
https://github.com/RPCSX/rpcsx.git
synced 2026-02-25 00:56:33 +01:00
gpu: implement image_store, image_store_pck, image_store_mip, image_store_mip_pck
fix convert_to_format
This commit is contained in:
parent
0287d42aa5
commit
7d704c37d5
|
|
@ -200,6 +200,68 @@ void Cache::ShaderResources::loadResources(
|
|||
buffer.address());
|
||||
}
|
||||
|
||||
for (auto &imageBuffer : res.imageBuffers) {
|
||||
auto word0 = eval(imageBuffer.words[0]).zExtScalar();
|
||||
auto word1 = eval(imageBuffer.words[1]).zExtScalar();
|
||||
auto word2 = eval(imageBuffer.words[2]).zExtScalar();
|
||||
auto word3 = eval(imageBuffer.words[3]).zExtScalar();
|
||||
|
||||
if (!word0 || !word1 || !word2 || !word3) {
|
||||
res.dump();
|
||||
rx::die("failed to evaluate V#");
|
||||
}
|
||||
|
||||
gnm::TBuffer tbuffer{};
|
||||
std::memcpy(reinterpret_cast<std::uint32_t *>(&tbuffer), &*word0,
|
||||
sizeof(std::uint32_t));
|
||||
std::memcpy(reinterpret_cast<std::uint32_t *>(&tbuffer) + 1, &*word1,
|
||||
sizeof(std::uint32_t));
|
||||
std::memcpy(reinterpret_cast<std::uint32_t *>(&tbuffer) + 2, &*word2,
|
||||
sizeof(std::uint32_t));
|
||||
std::memcpy(reinterpret_cast<std::uint32_t *>(&tbuffer) + 3, &*word3,
|
||||
sizeof(std::uint32_t));
|
||||
|
||||
if (imageBuffer.words[4] != nullptr) {
|
||||
auto word4 = eval(imageBuffer.words[4]).zExtScalar();
|
||||
auto word5 = eval(imageBuffer.words[5]).zExtScalar();
|
||||
auto word6 = eval(imageBuffer.words[6]).zExtScalar();
|
||||
auto word7 = eval(imageBuffer.words[7]).zExtScalar();
|
||||
|
||||
if (!word4 || !word5 || !word6 || !word7) {
|
||||
res.dump();
|
||||
rx::die("failed to evaluate 256 bit T#");
|
||||
}
|
||||
|
||||
std::memcpy(reinterpret_cast<std::uint32_t *>(&tbuffer) + 4, &*word4,
|
||||
sizeof(std::uint32_t));
|
||||
std::memcpy(reinterpret_cast<std::uint32_t *>(&tbuffer) + 5, &*word5,
|
||||
sizeof(std::uint32_t));
|
||||
std::memcpy(reinterpret_cast<std::uint32_t *>(&tbuffer) + 6, &*word6,
|
||||
sizeof(std::uint32_t));
|
||||
std::memcpy(reinterpret_cast<std::uint32_t *>(&tbuffer) + 7, &*word7,
|
||||
sizeof(std::uint32_t));
|
||||
}
|
||||
|
||||
auto info = computeSurfaceInfo(
|
||||
getDefaultTileModes()[tbuffer.tiling_idx], tbuffer.type, tbuffer.dfmt,
|
||||
tbuffer.width + 1, tbuffer.height + 1, tbuffer.depth + 1,
|
||||
tbuffer.pitch + 1, 0, tbuffer.last_array + 1, 0, tbuffer.last_level + 1,
|
||||
tbuffer.pow2pad != 0);
|
||||
|
||||
if (auto it = imageMemoryTable.queryArea(tbuffer.address());
|
||||
it != imageMemoryTable.end() &&
|
||||
it.beginAddress() == tbuffer.address() &&
|
||||
it.size() == info.totalTiledSize) {
|
||||
it.get().second |= imageBuffer.access;
|
||||
} else {
|
||||
imageMemoryTable.map(
|
||||
tbuffer.address(), tbuffer.address() + info.totalTiledSize,
|
||||
{ImageBufferKey::createFrom(tbuffer), imageBuffer.access});
|
||||
}
|
||||
resourceSlotToAddress.emplace_back(slotOffset + imageBuffer.resourceSlot,
|
||||
tbuffer.address());
|
||||
}
|
||||
|
||||
for (auto &texture : res.textures) {
|
||||
auto word0 = eval(texture.words[0]).zExtScalar();
|
||||
auto word1 = eval(texture.words[1]).zExtScalar();
|
||||
|
|
@ -325,6 +387,28 @@ void Cache::ShaderResources::buildMemoryTable(MemoryTable &memoryTable) {
|
|||
}
|
||||
}
|
||||
}
|
||||
void Cache::ShaderResources::buildImageMemoryTable(MemoryTable &memoryTable) {
|
||||
memoryTable.count = 0;
|
||||
|
||||
for (auto p : imageMemoryTable) {
|
||||
auto range = rx::AddressRange::fromBeginEnd(p.beginAddress, p.endAddress);
|
||||
auto buffer = cacheTag->getImageBuffer(p.payload.first, p.payload.second);
|
||||
|
||||
auto memoryTableSlot = memoryTable.count;
|
||||
memoryTable.slots[memoryTable.count++] = {
|
||||
.address = p.beginAddress,
|
||||
.size = range.size(),
|
||||
.flags = static_cast<uint8_t>(p.payload.second),
|
||||
.deviceAddress = buffer.deviceAddress,
|
||||
};
|
||||
|
||||
for (auto [slot, address] : resourceSlotToAddress) {
|
||||
if (address >= p.beginAddress && address < p.endAddress) {
|
||||
slotResources[slot] = memoryTableSlot;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::uint32_t Cache::ShaderResources::getResourceSlot(std::uint32_t id) {
|
||||
if (auto it = slotResources.find(id); it != slotResources.end()) {
|
||||
|
|
@ -386,6 +470,10 @@ Cache::ShaderResources::eval(ir::InstructionId instId,
|
|||
rx::die("resource depends on texture value");
|
||||
}
|
||||
|
||||
if (instId == ir::amdgpu::IMAGE_BUFFER) {
|
||||
rx::die("resource depends on image buffer value");
|
||||
}
|
||||
|
||||
if (instId == ir::amdgpu::SAMPLER) {
|
||||
rx::die("resource depends on sampler value");
|
||||
}
|
||||
|
|
@ -865,6 +953,7 @@ struct CachedImage : Cache::Entry {
|
|||
SurfaceInfo info;
|
||||
|
||||
bool expensive() {
|
||||
return false;
|
||||
if (kDisableCache) {
|
||||
return false;
|
||||
}
|
||||
|
|
@ -1139,7 +1228,7 @@ ImageBufferKey ImageBufferKey::createFrom(const ImageKey &imageKey) {
|
|||
}
|
||||
|
||||
SamplerKey SamplerKey::createFrom(const gnm::SSampler &sampler) {
|
||||
float lodBias = ((std::int16_t(sampler.lod_bias) << 2) >> 2) / float(256.f);
|
||||
float lodBias = sampler.lod_bias / 256.f;
|
||||
// FIXME: lodBias can be scaled by gnm::TBuffer
|
||||
|
||||
return {
|
||||
|
|
@ -1152,8 +1241,8 @@ SamplerKey SamplerKey::createFrom(const gnm::SSampler &sampler) {
|
|||
.mipLodBias = lodBias,
|
||||
.maxAnisotropy = 0, // max_aniso_ratio
|
||||
.compareOp = toVkCompareOp(sampler.depth_compare_func),
|
||||
.minLod = static_cast<float>(sampler.min_lod),
|
||||
.maxLod = static_cast<float>(sampler.max_lod),
|
||||
.minLod = sampler.min_lod / 256.f,
|
||||
.maxLod = sampler.max_lod / 256.f,
|
||||
.borderColor = toVkBorderColor(sampler.border_color_type),
|
||||
.anisotropyEnable = false,
|
||||
.compareEnable = sampler.depth_compare_func != gnm::CompareFunc::Never,
|
||||
|
|
@ -1334,12 +1423,9 @@ Cache::Buffer Cache::Tag::getBuffer(rx::AddressRange range, Access access) {
|
|||
auto it = table.queryArea(range.beginAddress());
|
||||
|
||||
if (it == table.end() || !it.range().contains(range)) {
|
||||
if (mParent->flushImages(*this, range)) {
|
||||
mScheduler->submit();
|
||||
mScheduler->wait();
|
||||
}
|
||||
|
||||
if (mParent->flushImageBuffers(*this, range)) {
|
||||
auto flushRange = mParent->flushImages(*this, range);
|
||||
flushRange = flushRange.merge(mParent->flushImageBuffers(*this, range));
|
||||
if (flushRange) {
|
||||
mScheduler->submit();
|
||||
mScheduler->wait();
|
||||
}
|
||||
|
|
@ -1375,18 +1461,18 @@ Cache::Buffer Cache::Tag::getBuffer(rx::AddressRange range, Access access) {
|
|||
addressRange.beginAddress(),
|
||||
addressRange.size()) ||
|
||||
!mParent->isInSync(addressRange, cached->tagId)) {
|
||||
if (mParent->flushImages(*this, range)) {
|
||||
auto flushedRange = mParent->flushImages(*this, range);
|
||||
flushedRange =
|
||||
flushedRange.merge(mParent->flushImageBuffers(*this, range));
|
||||
|
||||
if (flushedRange) {
|
||||
getScheduler().submit();
|
||||
getScheduler().wait();
|
||||
}
|
||||
|
||||
if (mParent->flushImageBuffers(*this, range)) {
|
||||
getScheduler().submit();
|
||||
getScheduler().wait();
|
||||
}
|
||||
|
||||
mParent->trackUpdate(EntryType::HostVisibleBuffer, addressRange, it.get(),
|
||||
getReadId(), cached->expensive());
|
||||
mParent->trackUpdate(
|
||||
EntryType::HostVisibleBuffer, addressRange, it.get(), getReadId(),
|
||||
(access & Access::Write) == Access::None && cached->expensive());
|
||||
amdgpu::RemoteMemory memory{mParent->mVmId};
|
||||
cached->update(addressRange,
|
||||
memory.getPointer(addressRange.beginAddress()));
|
||||
|
|
@ -1448,13 +1534,18 @@ Cache::Buffer Cache::Tag::getInternalDeviceLocalBuffer(std::uint64_t size) {
|
|||
}
|
||||
|
||||
void Cache::Tag::buildDescriptors(VkDescriptorSet descriptorSet) {
|
||||
auto &res = mStorage->shaderResources;
|
||||
auto memoryTableBuffer = getMemoryTable();
|
||||
auto imageMemoryTableBuffer = getImageMemoryTable();
|
||||
auto memoryTable = std::bit_cast<MemoryTable *>(memoryTableBuffer.data);
|
||||
mStorage->shaderResources.buildMemoryTable(*memoryTable);
|
||||
auto imageMemoryTable =
|
||||
std::bit_cast<MemoryTable *>(imageMemoryTableBuffer.data);
|
||||
|
||||
for (auto &sampler : mStorage->shaderResources.samplerResources) {
|
||||
uint32_t index =
|
||||
&sampler - mStorage->shaderResources.samplerResources.data();
|
||||
res.buildMemoryTable(*memoryTable);
|
||||
res.buildImageMemoryTable(*imageMemoryTable);
|
||||
|
||||
for (auto &sampler : res.samplerResources) {
|
||||
uint32_t index = &sampler - res.samplerResources.data();
|
||||
|
||||
VkDescriptorImageInfo samplerInfo{.sampler = sampler.handle};
|
||||
|
||||
|
|
@ -1471,8 +1562,8 @@ void Cache::Tag::buildDescriptors(VkDescriptorSet descriptorSet) {
|
|||
vkUpdateDescriptorSets(vk::context->device, 1, &writeDescSet, 0, nullptr);
|
||||
}
|
||||
|
||||
for (auto &imageResources : mStorage->shaderResources.imageResources) {
|
||||
auto dim = (&imageResources - mStorage->shaderResources.imageResources) + 1;
|
||||
for (auto &imageResources : res.imageResources) {
|
||||
auto dim = (&imageResources - res.imageResources) + 1;
|
||||
auto binding = static_cast<uint32_t>(
|
||||
Cache::getDescriptorBinding(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, dim));
|
||||
|
||||
|
|
@ -1725,8 +1816,9 @@ Cache::ImageBuffer Cache::Tag::getImageBuffer(const ImageBufferKey &key,
|
|||
|
||||
auto tiledBuffer = getBuffer(range, Access::Read);
|
||||
if (tiledBuffer.tagId != cached->tagId) {
|
||||
mParent->trackUpdate(EntryType::ImageBuffer, range, it.get(),
|
||||
tiledBuffer.tagId, cached->expensive());
|
||||
mParent->trackUpdate(
|
||||
EntryType::ImageBuffer, range, it.get(), tiledBuffer.tagId,
|
||||
(access & Access::Write) == Access::None && cached->expensive());
|
||||
|
||||
cached->update(this, cached->addressRange, tiledBuffer);
|
||||
}
|
||||
|
|
@ -1881,8 +1973,9 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) {
|
|||
imageBufferKey.address = key.readAddress;
|
||||
auto imageBuffer = getImageBuffer(imageBufferKey, Access::Read);
|
||||
if (imageBuffer.tagId != cached->tagId) {
|
||||
mParent->trackUpdate(EntryType::Image, storeRange, it.get(),
|
||||
imageBuffer.tagId, cached->expensive());
|
||||
mParent->trackUpdate(
|
||||
EntryType::Image, storeRange, it.get(), imageBuffer.tagId,
|
||||
(access & Access::Write) == Access::None && cached->expensive());
|
||||
|
||||
cached->update(this, cached->addressRange, imageBuffer);
|
||||
}
|
||||
|
|
@ -1992,6 +2085,11 @@ void Cache::Tag::release() {
|
|||
mAcquiredMemoryTable = -1;
|
||||
}
|
||||
|
||||
if (mAcquiredImageMemoryTable + 1 != 0) {
|
||||
getCache()->mMemoryTablePool.release(mAcquiredImageMemoryTable);
|
||||
mAcquiredImageMemoryTable = -1;
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<Entry>> tmpResources;
|
||||
bool hasSubmits = false;
|
||||
|
||||
|
|
@ -2141,6 +2239,7 @@ Cache::Shader Cache::GraphicsTag::getShader(
|
|||
}
|
||||
|
||||
std::uint64_t memoryTableAddress = getMemoryTable().deviceAddress;
|
||||
std::uint64_t imageMemoryTableAddress = getImageMemoryTable().deviceAddress;
|
||||
|
||||
std::uint64_t gdsAddress = mParent->getGdsBuffer().getAddress();
|
||||
mStorage->shaderResources.cacheTag = this;
|
||||
|
|
@ -2228,6 +2327,14 @@ Cache::Shader Cache::GraphicsTag::getShader(
|
|||
configPtr[index] = static_cast<std::uint32_t>(memoryTableAddress >> 32);
|
||||
}
|
||||
break;
|
||||
case gcn::ConfigType::ImageMemoryTable:
|
||||
if (slot.data == 0) {
|
||||
configPtr[index] = static_cast<std::uint32_t>(imageMemoryTableAddress);
|
||||
} else {
|
||||
configPtr[index] =
|
||||
static_cast<std::uint32_t>(imageMemoryTableAddress >> 32);
|
||||
}
|
||||
break;
|
||||
case gcn::ConfigType::Gds:
|
||||
if (slot.data == 0) {
|
||||
configPtr[index] = static_cast<std::uint32_t>(gdsAddress);
|
||||
|
|
@ -2295,6 +2402,7 @@ Cache::ComputeTag::getShader(const Registers::ComputeConfig &pgm) {
|
|||
}
|
||||
|
||||
std::uint64_t memoryTableAddress = getMemoryTable().deviceAddress;
|
||||
std::uint64_t imageMemoryTableAddress = getImageMemoryTable().deviceAddress;
|
||||
|
||||
std::uint64_t gdsAddress = mParent->getGdsBuffer().getAddress();
|
||||
mStorage->shaderResources.cacheTag = this;
|
||||
|
|
@ -2365,6 +2473,14 @@ Cache::ComputeTag::getShader(const Registers::ComputeConfig &pgm) {
|
|||
configPtr[index] = static_cast<std::uint32_t>(memoryTableAddress >> 32);
|
||||
}
|
||||
break;
|
||||
case gcn::ConfigType::ImageMemoryTable:
|
||||
if (slot.data == 0) {
|
||||
configPtr[index] = static_cast<std::uint32_t>(imageMemoryTableAddress);
|
||||
} else {
|
||||
configPtr[index] =
|
||||
static_cast<std::uint32_t>(imageMemoryTableAddress >> 32);
|
||||
}
|
||||
break;
|
||||
case gcn::ConfigType::Gds:
|
||||
if (slot.data == 0) {
|
||||
configPtr[index] = static_cast<std::uint32_t>(gdsAddress);
|
||||
|
|
@ -2585,12 +2701,10 @@ void Cache::invalidate(Tag &tag, rx::AddressRange range) {
|
|||
markHostInvalidated(mDevice, mVmId, range.beginAddress(), range.size());
|
||||
}
|
||||
void Cache::flush(Tag &tag, rx::AddressRange range) {
|
||||
if (flushImages(tag, range)) {
|
||||
tag.getScheduler().submit();
|
||||
tag.getScheduler().wait();
|
||||
}
|
||||
auto flushedRange = flushImages(tag, range);
|
||||
flushedRange = flushedRange.merge(flushImageBuffers(tag, range));
|
||||
|
||||
if (flushImageBuffers(tag, range)) {
|
||||
if (flushedRange) {
|
||||
tag.getScheduler().submit();
|
||||
tag.getScheduler().wait();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -87,6 +87,8 @@ struct ImageBufferKey {
|
|||
|
||||
static ImageBufferKey createFrom(const gnm::TBuffer &tbuffer);
|
||||
static ImageBufferKey createFrom(const ImageKey &imageKey);
|
||||
|
||||
constexpr auto operator<=>(const ImageBufferKey &) const = default;
|
||||
};
|
||||
|
||||
struct SamplerKey {
|
||||
|
|
@ -244,6 +246,7 @@ private:
|
|||
|
||||
std::uint32_t slotOffset = 0;
|
||||
rx::MemoryTableWithPayload<Access> bufferMemoryTable;
|
||||
rx::MemoryTableWithPayload<std::pair<ImageBufferKey, Access>> imageMemoryTable;
|
||||
std::vector<std::pair<std::uint32_t, std::uint64_t>> resourceSlotToAddress;
|
||||
std::vector<Cache::Sampler> samplerResources;
|
||||
std::vector<Cache::ImageView> imageResources[3];
|
||||
|
|
@ -256,6 +259,7 @@ private:
|
|||
cacheTag = nullptr;
|
||||
slotOffset = 0;
|
||||
bufferMemoryTable.clear();
|
||||
imageMemoryTable.clear();
|
||||
resourceSlotToAddress.clear();
|
||||
samplerResources.clear();
|
||||
for (auto &res : imageResources) {
|
||||
|
|
@ -268,6 +272,7 @@ private:
|
|||
void loadResources(shader::gcn::Resources &res,
|
||||
std::span<const std::uint32_t> userSgprs);
|
||||
void buildMemoryTable(MemoryTable &memoryTable);
|
||||
void buildImageMemoryTable(MemoryTable &memoryTable);
|
||||
std::uint32_t getResourceSlot(std::uint32_t id);
|
||||
|
||||
template <typename T> T readPointer(std::uint64_t address) {
|
||||
|
|
@ -317,6 +322,7 @@ private:
|
|||
std::unique_lock<std::mutex> mResourcesLock;
|
||||
TagId mTagId{};
|
||||
std::uint32_t mAcquiredMemoryTable = -1;
|
||||
std::uint32_t mAcquiredImageMemoryTable = -1;
|
||||
};
|
||||
|
||||
public:
|
||||
|
|
@ -392,6 +398,24 @@ public:
|
|||
return result;
|
||||
}
|
||||
|
||||
Buffer getImageMemoryTable() {
|
||||
if (mAcquiredImageMemoryTable + 1 == 0) {
|
||||
mAcquiredImageMemoryTable = mParent->mMemoryTablePool.acquire();
|
||||
}
|
||||
|
||||
auto &buffer = mParent->mMemoryTableBuffer;
|
||||
auto offset = mAcquiredImageMemoryTable * kMemoryTableSize;
|
||||
|
||||
Buffer result{
|
||||
.offset = offset,
|
||||
.deviceAddress = buffer.getAddress() + offset,
|
||||
.tagId = getReadId(),
|
||||
.data = buffer.getData() + offset,
|
||||
};
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::shared_ptr<Entry> findShader(const ShaderKey &key,
|
||||
const ShaderKey *dependedKey = nullptr);
|
||||
friend Cache;
|
||||
|
|
|
|||
|
|
@ -224,20 +224,23 @@ Device::Device() : vkContext(createVkContext(this)) {
|
|||
rx::AddressRange::fromBeginSize(address, rx::mem::pageSize);
|
||||
auto tag = getCacheTag(vmId, sched);
|
||||
|
||||
if (tag.getCache()->flushImages(tag, range)) {
|
||||
auto flushedRange = tag.getCache()->flushImages(tag, range);
|
||||
flushedRange =
|
||||
flushedRange.merge(tag.getCache()->flushImageBuffers(tag, range));
|
||||
|
||||
if (flushedRange) {
|
||||
sched.submit();
|
||||
sched.wait();
|
||||
}
|
||||
|
||||
if (tag.getCache()->flushImageBuffers(tag, range)) {
|
||||
sched.submit();
|
||||
sched.wait();
|
||||
flushedRange = tag.getCache()->flushBuffers(flushedRange);
|
||||
|
||||
if (flushedRange) {
|
||||
unlockReadWrite(vmId, flushedRange.beginAddress(),
|
||||
flushedRange.size());
|
||||
} else {
|
||||
unlockReadWrite(vmId, range.beginAddress(), range.size());
|
||||
}
|
||||
|
||||
auto flushedRange = tag.getCache()->flushBuffers(range);
|
||||
|
||||
assert(flushedRange.isValid() && flushedRange.size() > 0);
|
||||
unlockReadWrite(vmId, flushedRange.beginAddress(), flushedRange.size());
|
||||
}
|
||||
}
|
||||
});
|
||||
|
|
@ -915,6 +918,23 @@ void Device::waitForIdle() {
|
|||
}
|
||||
}
|
||||
|
||||
{
|
||||
auto &queue = graphicsPipes[0].ceQueue;
|
||||
if (queue.wptr != queue.rptr) {
|
||||
allProcessed = false;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &pipe : computePipes) {
|
||||
for (auto &queue : pipe.queues) {
|
||||
for (auto &ring : queue) {
|
||||
if (ring.wptr != ring.rptr) {
|
||||
allProcessed = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (allProcessed) {
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -138,6 +138,9 @@ struct TileMode {
|
|||
((static_cast<std::uint32_t>(mode) << 25) & 0x06000000);
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr auto operator<=>(const TileMode &other) const { return raw <=> other.raw; }
|
||||
constexpr bool operator==(const TileMode &other) const { return raw == other.raw; }
|
||||
};
|
||||
|
||||
struct MacroTileMode {
|
||||
|
|
|
|||
|
|
@ -104,6 +104,7 @@ enum class ConfigType {
|
|||
UserSgpr,
|
||||
ResourceSlot,
|
||||
MemoryTable,
|
||||
ImageMemoryTable,
|
||||
Gds,
|
||||
PsInputVGpr,
|
||||
VsInputSGpr,
|
||||
|
|
@ -156,6 +157,11 @@ struct Resources {
|
|||
ir::Value words[4];
|
||||
};
|
||||
|
||||
struct ImageBuffer : Resource {
|
||||
Access access;
|
||||
ir::Value words[8];
|
||||
};
|
||||
|
||||
struct Sampler : Resource {
|
||||
bool unorm;
|
||||
ir::Value words[4];
|
||||
|
|
@ -165,8 +171,9 @@ struct Resources {
|
|||
bool hasUnknown = false;
|
||||
std::uint32_t slots = 0;
|
||||
std::vector<Pointer> pointers;
|
||||
std::vector<Texture> textures;
|
||||
std::vector<Buffer> buffers;
|
||||
std::vector<Texture> textures;
|
||||
std::vector<ImageBuffer> imageBuffers;
|
||||
std::vector<Sampler> samplers;
|
||||
|
||||
void print(std::ostream &os, ir::NameStorage &ns) const;
|
||||
|
|
|
|||
|
|
@ -29,6 +29,8 @@ struct GcnOperand {
|
|||
Buffer,
|
||||
Texture128,
|
||||
Texture256,
|
||||
ImageBuffer128,
|
||||
ImageBuffer256,
|
||||
Sampler,
|
||||
Pointer,
|
||||
};
|
||||
|
|
@ -137,6 +139,13 @@ struct GcnOperand {
|
|||
.kind = (is128 ? Kind::Texture128 : Kind::Texture256),
|
||||
};
|
||||
}
|
||||
static constexpr GcnOperand createImageBuffer(GcnOperand firstReg, bool is128) {
|
||||
return {
|
||||
.firstRegisterKind = firstReg.kind,
|
||||
.firstRegisterIndex = static_cast<std::uint8_t>(firstReg.value),
|
||||
.kind = (is128 ? Kind::ImageBuffer128 : Kind::ImageBuffer256),
|
||||
};
|
||||
}
|
||||
static constexpr GcnOperand createBuffer(GcnOperand firstReg) {
|
||||
return {
|
||||
.firstRegisterKind = firstReg.kind,
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ enum Op {
|
|||
VBUFFER,
|
||||
SAMPLER,
|
||||
TBUFFER,
|
||||
IMAGE_BUFFER,
|
||||
POINTER,
|
||||
OMOD,
|
||||
NEG_ABS,
|
||||
|
|
@ -40,6 +41,8 @@ inline const char *getInstructionName(unsigned op) {
|
|||
return "sampler";
|
||||
case TBUFFER:
|
||||
return "tbuffer";
|
||||
case IMAGE_BUFFER:
|
||||
return "image_buffer";
|
||||
case POINTER:
|
||||
return "pointer";
|
||||
case OMOD:
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ enum RegId {
|
|||
VgprCount,
|
||||
ThreadId,
|
||||
MemoryTable,
|
||||
ImageMemoryTable,
|
||||
Gds,
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ DEFINE_SIZEOF(float64_t, 8);
|
|||
uint thread_id;
|
||||
uint64_t exec;
|
||||
|
||||
float32_t swizzle(f32vec4 comp, int sel) {
|
||||
uint32_t swizzle(u32vec4 comp, int sel) {
|
||||
switch (sel) {
|
||||
case 0: return 0;
|
||||
case 1: return 1;
|
||||
|
|
@ -76,8 +76,8 @@ float32_t swizzle(f32vec4 comp, int sel) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
f32vec4 swizzle(f32vec4 comp, int selX, int selY, int selZ, int selW) {
|
||||
return f32vec4(swizzle(comp, selX), swizzle(comp, selY), swizzle(comp, selZ), swizzle(comp, selW));
|
||||
u32vec4 swizzle(u32vec4 comp, int selX, int selY, int selZ, int selW) {
|
||||
return u32vec4(swizzle(comp, selX), swizzle(comp, selY), swizzle(comp, selZ), swizzle(comp, selW));
|
||||
}
|
||||
|
||||
int32_t sext(int32_t x, uint bits) {
|
||||
|
|
@ -247,10 +247,13 @@ float32_t ps_input_vgpr(int32_t index, f32vec4 fragCoord, bool frontFace) {
|
|||
case kPsVGprInputFrontFace:
|
||||
return intBitsToFloat(frontFace ? 1 : 0);
|
||||
case kPsVGprInputAncillary:
|
||||
debugPrintfEXT("ps_input_vgpr: kPsVGprInputAncillary");
|
||||
return 0;
|
||||
case kPsVGprInputSampleCoverage:
|
||||
debugPrintfEXT("ps_input_vgpr: kPsVGprInputSampleCoverage");
|
||||
return 0;
|
||||
case kPsVGprInputPosFixed:
|
||||
debugPrintfEXT("ps_input_vgpr: kPsVGprInputPosFixed");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -1389,6 +1392,11 @@ const int kBufferChannelTypeUInt = 0x00000004;
|
|||
const int kBufferChannelTypeSInt = 0x00000005;
|
||||
const int kBufferChannelTypeSNormNoZero = 0x00000006;
|
||||
const int kBufferChannelTypeFloat = 0x00000007;
|
||||
const int kNumericFormatSrgb = 0x00000009;
|
||||
const int kNumericFormatUBNorm = 0x0000000A;
|
||||
const int kNumericFormatUBNormNoZero = 0x0000000B;
|
||||
const int kNumericFormatUBInt = 0x0000000C;
|
||||
const int kNumericFormatUBScaled = 0x0000000D;
|
||||
|
||||
uint64_t compute_vbuffer_address(uint size, u32vec4 vbuffer, uint64_t soff, uint64_t OFFSET, bool IDXEN, uint64_t vINDEX, uint64_t vOFFSET) {
|
||||
bool addTid = vbuffer_addtid_en(vbuffer);
|
||||
|
|
@ -1594,8 +1602,10 @@ uint32_t convert_from_nfmt(uint32_t data, uint bits, uint nfmt) {
|
|||
data = zext(data, bits);
|
||||
|
||||
switch (nfmt) {
|
||||
case kNumericFormatSrgb:
|
||||
case kBufferChannelTypeUNorm:
|
||||
return floatBitsToUint(float(uint(data)) / ((1 << bits) - 1));
|
||||
uint32_t result = floatBitsToUint(float(uint(data)) / ((1 << bits) - 1));
|
||||
return result;
|
||||
|
||||
case kBufferChannelTypeSNorm:
|
||||
return floatBitsToUint(float(sext(int(data), bits)) / ((1 << (bits - 1)) - 1));
|
||||
|
|
@ -1607,7 +1617,7 @@ uint32_t convert_from_nfmt(uint32_t data, uint bits, uint nfmt) {
|
|||
return floatBitsToUint(float(sext(int(data), bits)));
|
||||
|
||||
case kBufferChannelTypeUInt:
|
||||
return data;
|
||||
return floatBitsToUint(float(data));
|
||||
|
||||
case kBufferChannelTypeSInt:
|
||||
return uint32_t(sext(int(data), bits));
|
||||
|
|
@ -1617,38 +1627,52 @@ uint32_t convert_from_nfmt(uint32_t data, uint bits, uint nfmt) {
|
|||
|
||||
case kBufferChannelTypeFloat:
|
||||
return data;
|
||||
|
||||
default:
|
||||
debugPrintfEXT("convert_from_nfmt: unexpected nfmt %x", nfmt);
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t convert_to_nfmt(uint32_t data, uint bits, uint nfmt) {
|
||||
data = zext(data, bits);
|
||||
|
||||
uint32_t result = 0;
|
||||
switch (nfmt) {
|
||||
case kBufferChannelTypeUNorm:
|
||||
return uint32_t(clamp(uintBitsToFloat(data), 0, 1) * ((1 << bits) - 1));
|
||||
result = uint32_t(clamp(uintBitsToFloat(data), 0, 1) * ((1 << bits) - 1));
|
||||
break;
|
||||
|
||||
case kBufferChannelTypeSNorm:
|
||||
return uint32_t(clamp(uintBitsToFloat(data), -1, 1) * ((1 << (bits - 1)) - 1));
|
||||
result = uint32_t(clamp(uintBitsToFloat(data), -1, 1) * ((1 << (bits - 1)) - 1));
|
||||
break;
|
||||
|
||||
case kBufferChannelTypeUScaled:
|
||||
return uint32_t(uintBitsToFloat(data));
|
||||
result = uint32_t(uintBitsToFloat(data));
|
||||
break;
|
||||
|
||||
case kBufferChannelTypeUInt:
|
||||
return data;
|
||||
result = uint32_t(uintBitsToFloat(data));
|
||||
break;
|
||||
|
||||
case kBufferChannelTypeSInt:
|
||||
return uint32_t(sext(int32_t(data), bits));
|
||||
result = uint32_t(sext(int32_t(uintBitsToFloat(data)), bits));
|
||||
break;
|
||||
|
||||
case kBufferChannelTypeSNormNoZero:
|
||||
return uint32_t(clamp(uintBitsToFloat(data), -1, 1) * ((1 << bits) - 1) / 2 - 1);
|
||||
result = uint32_t(clamp(uintBitsToFloat(data), -1, 1) * ((1 << bits) - 1) / 2 - 1);
|
||||
break;
|
||||
|
||||
case kBufferChannelTypeFloat:
|
||||
return data;
|
||||
result = data;
|
||||
break;
|
||||
|
||||
default:
|
||||
debugPrintfEXT("convert_to_nfmt: unexpected nfmt %x", nfmt);
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return zext(result, bits);
|
||||
}
|
||||
|
||||
uint32_t convert_from_format_x(uint32_t data, uint dfmt, uint nfmt) {
|
||||
|
|
@ -1877,6 +1901,10 @@ uint32_t convert_to_format(uint element, u32vec4 data, uint dfmt, uint nfmt) {
|
|||
(convert_to_nfmt(data[2], 10, nfmt) << 20) |
|
||||
(convert_to_nfmt(data[3], 2, nfmt) << 30)
|
||||
);
|
||||
|
||||
default:
|
||||
debugPrintfEXT("convert_to_format: unexpected format");
|
||||
break;
|
||||
}
|
||||
|
||||
return uint32_t(0);
|
||||
|
|
@ -1938,6 +1966,7 @@ u32vec4 buffer_load_format(uint dfmt, uint nfmt, uint32_t vOFFSET, uint32_t vIND
|
|||
uint64_t address = compute_vbuffer_address(data_size, vbuffer, soff, OFFSET, IDXEN, vINDEX, vOFFSET);
|
||||
|
||||
if (address == 0 || dfmt == kBufferFormatInvalid) {
|
||||
debugPrintfEXT("buffer_load_format: invalid buffer");
|
||||
return u32vec4(0);
|
||||
}
|
||||
|
||||
|
|
@ -1945,6 +1974,7 @@ u32vec4 buffer_load_format(uint dfmt, uint nfmt, uint32_t vOFFSET, uint32_t vIND
|
|||
uint64_t deviceAddress = findMemoryAddress(address, data_size, memoryLocationHint, deviceAreaSize);
|
||||
|
||||
if (deviceAddress == kInvalidAddress || deviceAreaSize < data_size) {
|
||||
debugPrintfEXT("buffer_load_format: out of buffer memory");
|
||||
return u32vec4(0);
|
||||
}
|
||||
|
||||
|
|
@ -2189,7 +2219,8 @@ uint32_t[16] s_load_dwordx16(int32_t memoryLocationHint, uint64_t sbase, int32_t
|
|||
uint64_t deviceAddress = findMemoryAddress(base_address + (offset & ~0x3ul), size, memoryLocationHint, deviceAreaSize); \
|
||||
int32_t _offset = 0; \
|
||||
for (int i = 0; i < N; i++) { \
|
||||
if (deviceAddress == kInvalidAddress || _offset + SIZEOF(uint32_t) > deviceAreaSize) { \
|
||||
if (deviceAddress == kInvalidAddress || offset + _offset > size - SIZEOF(uint32_t) || _offset + SIZEOF(uint32_t) > deviceAreaSize) { \
|
||||
debugPrintfEXT("S_BUFFER_LOAD_DWORD: out of buffer memory"); \
|
||||
sdst[i] = 0; \
|
||||
} else { \
|
||||
sdst[i] = MEMORY_DATA_REF(uint32_t, deviceAddress + _offset); \
|
||||
|
|
@ -2210,6 +2241,10 @@ u32vec2 s_buffer_load_dwordx2(int32_t memoryLocationHint, u32vec4 vbuffer, int32
|
|||
u32vec4 s_buffer_load_dwordx4(int32_t memoryLocationHint, u32vec4 vbuffer, int32_t offset) {
|
||||
u32vec4 sdst;
|
||||
S_BUFFER_LOAD_DWORD(sdst, memoryLocationHint, vbuffer, offset, 4);
|
||||
if (offset == 48) {
|
||||
uint64_t base = vbuffer_base(vbuffer);
|
||||
debugPrintfEXT("s_buffer_load_dwordx4: %v4u, base=%lx", sdst, base);
|
||||
}
|
||||
return sdst;
|
||||
}
|
||||
uint32_t[8] s_buffer_load_dwordx8(int32_t memoryLocationHint, u32vec4 vbuffer, int32_t offset) {
|
||||
|
|
@ -2629,6 +2664,132 @@ int findTexture3DIndex(int32_t textureIndexHint, uint32_t tbuffer[8]) {
|
|||
return textureIndexHint;
|
||||
}
|
||||
|
||||
const uint32_t kImageFlagDmask = 0xf;
|
||||
const uint32_t kImageFlagR128 = 1 << 4;
|
||||
const uint32_t kImageFlagDA = 1 << 5;
|
||||
const uint32_t kImageFlagUnorm = 1 << 6;
|
||||
const uint32_t kImageFlagTFE = 1 << 7;
|
||||
|
||||
struct ImageInfo {
|
||||
uint64_t offset;
|
||||
uvec3 extent;
|
||||
uint16_t pitch;
|
||||
uint16_t baseArraySlice;
|
||||
uint16_t arraySliceCount;
|
||||
uint8_t baseMipLevel;
|
||||
uint8_t mipLevelCount;
|
||||
uint8_t type;
|
||||
uint8_t dataSize;
|
||||
};
|
||||
|
||||
ImageInfo getImageInfo(uint32_t tbuffer[8], uint32_t mipLevel) {
|
||||
uint8_t type = tbuffer_type(tbuffer);
|
||||
uint16_t width = uint16_t(tbuffer_width(tbuffer) + 1u);
|
||||
uint16_t height = uint16_t(tbuffer_height(tbuffer) + 1u);
|
||||
uint16_t depth = uint16_t(tbuffer_depth(tbuffer) + 1u);
|
||||
uint16_t pitch = uint16_t(tbuffer_pitch(tbuffer) + 1u);
|
||||
uint16_t baseArray = tbuffer_base_array(tbuffer);
|
||||
uint16_t lastArray = tbuffer_last_array(tbuffer);
|
||||
uint8_t baseLevel = tbuffer_base_level(tbuffer);
|
||||
uint8_t lastLevel = tbuffer_last_level(tbuffer);
|
||||
bool pow2pad = tbuffer_pow2pad(tbuffer);
|
||||
bool isVolume = type == kTextureType3D;
|
||||
bool isCubemap = type == kTextureTypeCube;
|
||||
depth = isVolume ? depth : uint16_t(1);
|
||||
|
||||
uint arraySliceCount = depth;
|
||||
|
||||
if (isCubemap) {
|
||||
arraySliceCount *= 6;
|
||||
} else if (isVolume) {
|
||||
arraySliceCount = 1;
|
||||
}
|
||||
|
||||
if (pow2pad) {
|
||||
if ((arraySliceCount & (arraySliceCount - 1)) != 0 ||
|
||||
(width & (width - 1)) != 0 ||
|
||||
(height & (height - 1)) != 0 ||
|
||||
(depth & (depth - 1)) != 0 ||
|
||||
(pitch & (pitch - 1)) != 0) {
|
||||
debugPrintfEXT("getImageInfo: pow2pad");
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t offset = 0;
|
||||
uint8_t dfmt = tbuffer_dfmt(tbuffer);
|
||||
uint dataSize = size_of_format(dfmt);
|
||||
|
||||
for (uint32_t i = 0; i < mipLevel; ++i) {
|
||||
uint16_t mipHeight = uint16_t(max(height >> i, 1));
|
||||
uint16_t mipDepth = uint16_t(max(depth >> i, 1));
|
||||
uint16_t mipPitch = uint16_t(max(pitch >> i, 1));
|
||||
|
||||
offset += arraySliceCount * dataSize * mipHeight * mipPitch * mipDepth;
|
||||
}
|
||||
|
||||
width = uint16_t(max(width >> mipLevel, 1));
|
||||
height = uint16_t(max(height >> mipLevel, 1));
|
||||
depth = uint16_t(max(depth >> mipLevel, 1));
|
||||
pitch = uint16_t(max(pitch >> mipLevel, 1));
|
||||
|
||||
ImageInfo result;
|
||||
result.offset = offset;
|
||||
result.extent = uvec3(width, height, depth);
|
||||
result.pitch = pitch;
|
||||
result.baseArraySlice = baseArray;
|
||||
result.arraySliceCount = uint16_t(min(arraySliceCount, lastArray - baseArray + 1));
|
||||
result.baseMipLevel = baseLevel;
|
||||
result.mipLevelCount = uint8_t(lastLevel - baseLevel + 1);
|
||||
result.type = type;
|
||||
result.dataSize = uint8_t(dataSize);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
uint64_t findImageUnormPixelAddress(int32_t imageMemoryIndexHint, uint32_t tbuffer[8], uint32_t mipLevel, uint32_t arrayLayer, i32vec3 pos) {
|
||||
ImageInfo img = getImageInfo(tbuffer, mipLevel);
|
||||
|
||||
if (any(lessThan(pos, ivec3(0))) || any(greaterThan(pos, img.extent))) {
|
||||
return kInvalidAddress;
|
||||
}
|
||||
|
||||
uint64_t address = tbuffer_base(tbuffer);
|
||||
|
||||
address += img.offset;
|
||||
address += img.dataSize * pos.x;
|
||||
address += img.dataSize * img.pitch * pos.y;
|
||||
address += img.dataSize * img.pitch * img.extent.y * pos.z;
|
||||
|
||||
uint64_t deviceAreaSize = 0;
|
||||
uint64_t deviceAddress = findImageMemoryAddress(address, img.dataSize, imageMemoryIndexHint, deviceAreaSize);
|
||||
|
||||
if (deviceAddress == kInvalidAddress || deviceAreaSize < img.dataSize) {
|
||||
return kInvalidAddress;
|
||||
}
|
||||
|
||||
return deviceAddress;
|
||||
}
|
||||
|
||||
uint64_t findImagePixelAddress(int32_t imageMemoryIndexHint, uint32_t tbuffer[8], float mipLevel, uint32_t arrayLayer, f32vec3 pos, bool unorm) {
|
||||
i32vec3 unormPos;
|
||||
if (unorm) {
|
||||
unormPos = i32vec3(pos);
|
||||
} else {
|
||||
unormPos.x = int32_t((tbuffer_width(tbuffer) + 1) * pos.x);
|
||||
unormPos.y = int32_t((tbuffer_height(tbuffer) + 1) * pos.y);
|
||||
unormPos.z = int32_t((tbuffer_depth(tbuffer) + 1) * pos.z);
|
||||
}
|
||||
|
||||
uint8_t baseLevel = tbuffer_base_level(tbuffer);
|
||||
uint8_t lastLevel = tbuffer_last_level(tbuffer);
|
||||
|
||||
uint32_t umipLevel = baseLevel + uint32_t((lastLevel - baseLevel + 1) * mipLevel);
|
||||
umipLevel = min(umipLevel, lastLevel + 1);
|
||||
umipLevel = max(umipLevel, baseLevel);
|
||||
|
||||
return findImageUnormPixelAddress(imageMemoryIndexHint, tbuffer, umipLevel, arrayLayer, unormPos);
|
||||
}
|
||||
|
||||
// void image_gather4(inout u32vec4 vdata, u32vec4 vaddr, int32_t textureIndexHint, uint32_t tbuffer[8], int32_t samplerIndexHint, u32vec4 samplerDescriptor) {}
|
||||
// image_gather4_cl
|
||||
// image_gather4_l
|
||||
|
|
@ -2685,35 +2846,74 @@ int findTexture3DIndex(int32_t textureIndexHint, uint32_t tbuffer[8]) {
|
|||
// void image_atomic_umin() {}
|
||||
// void image_atomic_xor() {}
|
||||
|
||||
void image_load(inout f32vec4 vdata, i32vec3 vaddr, int32_t textureIndexHint, uint32_t tbuffer[8], uint32_t dmask) {
|
||||
uint8_t textureType = tbuffer_type(tbuffer);
|
||||
f32vec4 result;
|
||||
void image_load(inout u32vec4 vdata, u32vec3 vaddr, int32_t imageBufferIndexHint, uint32_t tbuffer[8], uint32_t flags) {
|
||||
u32vec3 pos = u32vec3(0);
|
||||
|
||||
switch (uint(textureType)) {
|
||||
switch (uint(tbuffer_type(tbuffer))) {
|
||||
case kTextureType1D:
|
||||
case kTextureTypeArray1D:
|
||||
result = texelFetch(textures1D[findTexture1DIndex(textureIndexHint, tbuffer)], vaddr.x, 0);
|
||||
pos.x = vaddr[0];
|
||||
break;
|
||||
|
||||
case kTextureType2D:
|
||||
case kTextureTypeCube:
|
||||
case kTextureTypeArray2D:
|
||||
case kTextureTypeMsaa2D:
|
||||
case kTextureTypeMsaaArray2D:
|
||||
result = texelFetch(textures2D[findTexture2DIndex(textureIndexHint, tbuffer)], vaddr.xy, 0);
|
||||
pos.xy = vaddr.xy;
|
||||
break;
|
||||
|
||||
case kTextureType3D:
|
||||
result = texelFetch(textures3D[findTexture3DIndex(textureIndexHint, tbuffer)], vaddr, 0);
|
||||
pos.xyz = vaddr.xyz;
|
||||
break;
|
||||
|
||||
default:
|
||||
case kTextureTypeMsaa2D:
|
||||
case kTextureTypeMsaaArray2D:
|
||||
debugPrintfEXT("image_load: MSAA");
|
||||
pos.xy = ivec2(vaddr.xy);
|
||||
break;
|
||||
}
|
||||
|
||||
uint64_t deviceAddress = findImageUnormPixelAddress(imageBufferIndexHint, tbuffer, 0, 0, i32vec3(pos));
|
||||
if (deviceAddress == kInvalidAddress) {
|
||||
debugPrintfEXT("image_load: invalid address");
|
||||
return;
|
||||
}
|
||||
|
||||
uint8_t dfmt = tbuffer_dfmt(tbuffer);
|
||||
uint8_t nfmt = tbuffer_nfmt(tbuffer);
|
||||
|
||||
uint data_size = size_of_format(dfmt);
|
||||
uint elements_count = (data_size + SIZEOF(uint32_t) - 1) / SIZEOF(uint32_t);
|
||||
uint channel_count = components_of_format(dfmt);
|
||||
uint channel_size = data_size / channel_count;
|
||||
uint channels_per_element;
|
||||
|
||||
if (data_size > SIZEOF(uint32_t)) {
|
||||
channels_per_element = SIZEOF(uint32_t) / channel_size;
|
||||
} else {
|
||||
channels_per_element = channel_count;
|
||||
}
|
||||
|
||||
u32vec4 result = u32vec4(0);
|
||||
int outIndex = 0;
|
||||
|
||||
for (uint element = 0; element < elements_count; element++) {
|
||||
uint32_t data = MEMORY_DATA_REF(uint32_t, deviceAddress);
|
||||
u32vec4 unpacked = convert_from_format(data, dfmt, nfmt);
|
||||
deviceAddress += SIZEOF(uint32_t);
|
||||
for (int channel = 0; channel < channels_per_element; channel++) {
|
||||
result[outIndex++] = unpacked[channel];
|
||||
}
|
||||
}
|
||||
|
||||
result = swizzle(result,
|
||||
tbuffer_dst_sel_x(tbuffer),
|
||||
tbuffer_dst_sel_y(tbuffer),
|
||||
tbuffer_dst_sel_z(tbuffer),
|
||||
tbuffer_dst_sel_w(tbuffer));
|
||||
|
||||
int vdataIndex = 0;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if ((dmask & (1 << i)) != 0) {
|
||||
if ((flags & (1 << i)) != 0) {
|
||||
vdata[vdataIndex++] = result[i];
|
||||
}
|
||||
}
|
||||
|
|
@ -2722,36 +2922,80 @@ void image_load(inout f32vec4 vdata, i32vec3 vaddr, int32_t textureIndexHint, ui
|
|||
// void image_load_pck() {}
|
||||
// void image_load_pck_sgn() {}
|
||||
|
||||
void image_load_mip(inout f32vec4 vdata, u32vec4 vaddr_u, int32_t textureIndexHint, uint32_t tbuffer[8], uint32_t dmask) {
|
||||
uint8_t textureType = tbuffer_type(tbuffer);
|
||||
f32vec4 result;
|
||||
i32vec4 vaddr = i32vec4(vaddr_u);
|
||||
void image_load_mip(inout u32vec4 vdata, u32vec4 vaddr, int32_t imageBufferIndexHint, uint32_t tbuffer[8], uint32_t flags) {
|
||||
u32vec3 pos = u32vec3(0);
|
||||
uint32_t mipLevel = 0;
|
||||
|
||||
switch (uint(textureType)) {
|
||||
switch (uint(tbuffer_type(tbuffer))) {
|
||||
case kTextureType1D:
|
||||
case kTextureTypeArray1D:
|
||||
result = texelFetch(textures1D[findTexture1DIndex(textureIndexHint, tbuffer)], vaddr.x, vaddr.y);
|
||||
pos.x = vaddr[0];
|
||||
mipLevel = vaddr.y;
|
||||
break;
|
||||
|
||||
case kTextureType2D:
|
||||
case kTextureTypeCube:
|
||||
case kTextureTypeArray2D:
|
||||
case kTextureTypeMsaa2D:
|
||||
case kTextureTypeMsaaArray2D:
|
||||
result = texelFetch(textures2D[findTexture2DIndex(textureIndexHint, tbuffer)], vaddr.xy, vaddr.z);
|
||||
pos.xy = vaddr.xy;
|
||||
mipLevel = vaddr.z;
|
||||
break;
|
||||
|
||||
case kTextureType3D:
|
||||
result = texelFetch(textures3D[findTexture3DIndex(textureIndexHint, tbuffer)], vaddr.xyz, vaddr.w);
|
||||
pos.xyz = vaddr.xyz;
|
||||
mipLevel = vaddr.w;
|
||||
break;
|
||||
|
||||
default:
|
||||
case kTextureTypeMsaa2D:
|
||||
case kTextureTypeMsaaArray2D:
|
||||
debugPrintfEXT("image_load_mip: MSAA");
|
||||
pos.xy = ivec2(vaddr.xy);
|
||||
mipLevel = vaddr.z;
|
||||
break;
|
||||
}
|
||||
|
||||
uint64_t deviceAddress = findImageUnormPixelAddress(imageBufferIndexHint, tbuffer, mipLevel, 0, i32vec3(pos));
|
||||
if (deviceAddress == kInvalidAddress) {
|
||||
debugPrintfEXT("image_load_mip: invalid address");
|
||||
return;
|
||||
}
|
||||
|
||||
uint8_t dfmt = tbuffer_dfmt(tbuffer);
|
||||
uint8_t nfmt = tbuffer_nfmt(tbuffer);
|
||||
|
||||
uint data_size = size_of_format(dfmt);
|
||||
uint elements_count = (data_size + SIZEOF(uint32_t) - 1) / SIZEOF(uint32_t);
|
||||
uint channel_count = components_of_format(dfmt);
|
||||
uint channel_size = data_size / channel_count;
|
||||
uint channels_per_element;
|
||||
|
||||
if (data_size > SIZEOF(uint32_t)) {
|
||||
channels_per_element = SIZEOF(uint32_t) / channel_size;
|
||||
} else {
|
||||
channels_per_element = channel_count;
|
||||
}
|
||||
|
||||
u32vec4 result = u32vec4(0);
|
||||
int outIndex = 0;
|
||||
|
||||
for (uint element = 0; element < elements_count; element++) {
|
||||
uint32_t data = MEMORY_DATA_REF(uint32_t, deviceAddress);
|
||||
u32vec4 unpacked = convert_from_format(data, dfmt, nfmt);
|
||||
deviceAddress += SIZEOF(uint32_t);
|
||||
// debugPrintfEXT("image_load_mip: data: %x, unpacked: %v4x, element: %u, channels: %u", data, unpacked, element, channels_per_element);
|
||||
for (int channel = 0; channel < channels_per_element; channel++) {
|
||||
result[outIndex++] = unpacked[channel];
|
||||
}
|
||||
}
|
||||
|
||||
result = swizzle(result,
|
||||
tbuffer_dst_sel_x(tbuffer),
|
||||
tbuffer_dst_sel_y(tbuffer),
|
||||
tbuffer_dst_sel_z(tbuffer),
|
||||
tbuffer_dst_sel_w(tbuffer));
|
||||
|
||||
int vdataIndex = 0;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if ((dmask & (1 << i)) != 0) {
|
||||
if ((flags & (1 << i)) != 0) {
|
||||
vdata[vdataIndex++] = result[i];
|
||||
}
|
||||
}
|
||||
|
|
@ -2759,10 +3003,203 @@ void image_load_mip(inout f32vec4 vdata, u32vec4 vaddr_u, int32_t textureIndexHi
|
|||
|
||||
// void image_load_mip_pck() {}
|
||||
// void image_load_mip_pck_sgn() {}
|
||||
// void image_store() {}
|
||||
// void image_store_pck() {}
|
||||
// void image_store_mip() {}
|
||||
// void image_store_mip_pck() {}
|
||||
|
||||
void image_store(u32vec4 vdata, u32vec4 vaddr, int32_t imageBufferIndexHint, uint32_t tbuffer[8], uint32_t flags) {
|
||||
u32vec3 pos = u32vec3(0);
|
||||
|
||||
switch (uint(tbuffer_type(tbuffer))) {
|
||||
case kTextureType1D:
|
||||
case kTextureTypeArray1D:
|
||||
pos.x = vaddr[0];
|
||||
break;
|
||||
|
||||
case kTextureType2D:
|
||||
case kTextureTypeCube:
|
||||
case kTextureTypeArray2D:
|
||||
pos.xy = vaddr.xy;
|
||||
break;
|
||||
|
||||
case kTextureType3D:
|
||||
pos.xyz = vaddr.xyz;
|
||||
break;
|
||||
|
||||
case kTextureTypeMsaa2D:
|
||||
case kTextureTypeMsaaArray2D:
|
||||
debugPrintfEXT("image_store: MSAA");
|
||||
pos.xy = ivec2(vaddr.xy);
|
||||
break;
|
||||
}
|
||||
|
||||
uint64_t deviceAddress = findImageUnormPixelAddress(imageBufferIndexHint, tbuffer, 0, 0, i32vec3(pos));
|
||||
if (deviceAddress == kInvalidAddress) {
|
||||
debugPrintfEXT("image_store: invalid address");
|
||||
return;
|
||||
}
|
||||
|
||||
if ((flags & kImageFlagDmask) != 0xf) {
|
||||
debugPrintfEXT("image_store: unexpected dmask. flags %x", flags);
|
||||
}
|
||||
|
||||
uint8_t dfmt = tbuffer_dfmt(tbuffer);
|
||||
uint8_t nfmt = tbuffer_nfmt(tbuffer);
|
||||
|
||||
uint data_size = size_of_format(dfmt);
|
||||
uint elements_count = (data_size + SIZEOF(uint32_t) - 1) / SIZEOF(uint32_t);
|
||||
|
||||
for (uint element = 0; element < elements_count; element++) {
|
||||
uint32_t value = convert_to_format(element, vdata, dfmt, nfmt);
|
||||
MEMORY_DATA_REF(uint32_t, deviceAddress) = value;
|
||||
deviceAddress += SIZEOF(uint32_t);
|
||||
}
|
||||
}
|
||||
void image_store_pck(u32vec4 vdata, u32vec4 vaddr, int32_t imageBufferIndexHint, uint32_t tbuffer[8], uint32_t flags) {
|
||||
u32vec3 pos = u32vec3(0);
|
||||
|
||||
switch (uint(tbuffer_type(tbuffer))) {
|
||||
case kTextureType1D:
|
||||
case kTextureTypeArray1D:
|
||||
pos.x = vaddr[0];
|
||||
break;
|
||||
|
||||
case kTextureType2D:
|
||||
case kTextureTypeCube:
|
||||
case kTextureTypeArray2D:
|
||||
pos.xy = vaddr.xy;
|
||||
break;
|
||||
|
||||
case kTextureType3D:
|
||||
pos.xyz = vaddr.xyz;
|
||||
break;
|
||||
|
||||
case kTextureTypeMsaa2D:
|
||||
case kTextureTypeMsaaArray2D:
|
||||
debugPrintfEXT("image_store: MSAA");
|
||||
pos.xy = ivec2(vaddr.xy);
|
||||
break;
|
||||
}
|
||||
|
||||
uint64_t deviceAddress = findImageUnormPixelAddress(imageBufferIndexHint, tbuffer, 0, 0, i32vec3(pos));
|
||||
if (deviceAddress == kInvalidAddress) {
|
||||
debugPrintfEXT("image_store: invalid address");
|
||||
return;
|
||||
}
|
||||
|
||||
if ((flags & kImageFlagDmask) != 0xf) {
|
||||
debugPrintfEXT("image_store: unexpected dmask. flags %x", flags);
|
||||
}
|
||||
|
||||
uint8_t dfmt = tbuffer_dfmt(tbuffer);
|
||||
uint8_t nfmt = tbuffer_nfmt(tbuffer);
|
||||
|
||||
uint data_size = size_of_format(dfmt);
|
||||
uint elements_count = (data_size + SIZEOF(uint32_t) - 1) / SIZEOF(uint32_t);
|
||||
|
||||
for (uint element = 0; element < elements_count; element++) {
|
||||
MEMORY_DATA_REF(uint32_t, deviceAddress) = vdata[element];
|
||||
deviceAddress += SIZEOF(uint32_t);
|
||||
}
|
||||
}
|
||||
void image_store_mip(u32vec4 vdata, u32vec4 vaddr, int32_t imageBufferIndexHint, uint32_t tbuffer[8], uint32_t flags) {
|
||||
u32vec3 pos = u32vec3(0);
|
||||
uint32_t mipLevel = 0;
|
||||
|
||||
switch (uint(tbuffer_type(tbuffer))) {
|
||||
case kTextureType1D:
|
||||
case kTextureTypeArray1D:
|
||||
pos.x = vaddr[0];
|
||||
mipLevel = vaddr.y;
|
||||
break;
|
||||
|
||||
case kTextureType2D:
|
||||
case kTextureTypeCube:
|
||||
case kTextureTypeArray2D:
|
||||
pos.xy = vaddr.xy;
|
||||
mipLevel = vaddr.z;
|
||||
break;
|
||||
|
||||
case kTextureType3D:
|
||||
pos.xyz = vaddr.xyz;
|
||||
mipLevel = vaddr.w;
|
||||
break;
|
||||
|
||||
case kTextureTypeMsaa2D:
|
||||
case kTextureTypeMsaaArray2D:
|
||||
debugPrintfEXT("image_store_mip: MSAA");
|
||||
pos.xy = ivec2(vaddr.xy);
|
||||
mipLevel = vaddr.z;
|
||||
break;
|
||||
}
|
||||
|
||||
uint64_t deviceAddress = findImageUnormPixelAddress(imageBufferIndexHint, tbuffer, mipLevel, 0, i32vec3(pos));
|
||||
if (deviceAddress == kInvalidAddress) {
|
||||
debugPrintfEXT("image_store_mip: invalid address");
|
||||
return;
|
||||
}
|
||||
|
||||
if ((flags & kImageFlagDmask) != 0xf) {
|
||||
debugPrintfEXT("image_store_mip: unexpected dmask. flags %x", flags);
|
||||
}
|
||||
|
||||
uint8_t dfmt = tbuffer_dfmt(tbuffer);
|
||||
uint8_t nfmt = tbuffer_nfmt(tbuffer);
|
||||
|
||||
uint data_size = size_of_format(dfmt);
|
||||
uint elements_count = (data_size + SIZEOF(uint32_t) - 1) / SIZEOF(uint32_t);
|
||||
|
||||
for (uint element = 0; element < elements_count; element++) {
|
||||
uint32_t value = convert_to_format(element, vdata, dfmt, nfmt);
|
||||
MEMORY_DATA_REF(uint32_t, deviceAddress) = value;
|
||||
deviceAddress += SIZEOF(uint32_t);
|
||||
}
|
||||
}
|
||||
void image_store_mip_pck(u32vec4 vdata, u32vec4 vaddr, int32_t imageBufferIndexHint, uint32_t tbuffer[8], uint32_t flags) {
|
||||
u32vec3 pos = u32vec3(0);
|
||||
uint32_t mipLevel = 0;
|
||||
|
||||
switch (uint(tbuffer_type(tbuffer))) {
|
||||
case kTextureType1D:
|
||||
case kTextureTypeArray1D:
|
||||
pos.x = vaddr[0];
|
||||
mipLevel = vaddr.y;
|
||||
break;
|
||||
|
||||
case kTextureType2D:
|
||||
case kTextureTypeCube:
|
||||
case kTextureTypeArray2D:
|
||||
pos.xy = vaddr.xy;
|
||||
mipLevel = vaddr.z;
|
||||
break;
|
||||
|
||||
case kTextureType3D:
|
||||
pos.xyz = vaddr.xyz;
|
||||
mipLevel = vaddr.w;
|
||||
break;
|
||||
|
||||
case kTextureTypeMsaa2D:
|
||||
case kTextureTypeMsaaArray2D:
|
||||
debugPrintfEXT("image_store_mip_pck: MSAA");
|
||||
return;
|
||||
}
|
||||
|
||||
uint64_t deviceAddress = findImageUnormPixelAddress(imageBufferIndexHint, tbuffer, mipLevel, 0, i32vec3(pos));
|
||||
if (deviceAddress == kInvalidAddress) {
|
||||
debugPrintfEXT("image_store_mip_pck: invalid address");
|
||||
return;
|
||||
}
|
||||
|
||||
if ((flags & kImageFlagDmask) != 0xf) {
|
||||
debugPrintfEXT("image_store_mip_pck: unexpected dmask. flags %x", flags);
|
||||
}
|
||||
|
||||
uint8_t dfmt = tbuffer_dfmt(tbuffer);
|
||||
uint data_size = size_of_format(dfmt);
|
||||
uint elements_count = (data_size + SIZEOF(uint32_t) - 1) / SIZEOF(uint32_t);
|
||||
|
||||
for (uint element = 0; element < elements_count; element++) {
|
||||
MEMORY_DATA_REF(uint32_t, deviceAddress) = vdata[element];
|
||||
deviceAddress += SIZEOF(uint32_t);
|
||||
}
|
||||
}
|
||||
|
||||
void image_sample(inout f32vec4 vdata, f32vec3 vaddr, int32_t textureIndexHint, uint32_t tbuffer[8], int32_t samplerIndexHint, u32vec4 ssampler, uint32_t dmask) {
|
||||
uint8_t textureType = tbuffer_type(tbuffer);
|
||||
|
|
@ -2980,38 +3417,8 @@ void image_get_lod(inout f32vec2 vdata, u32vec3 vaddr, int32_t textureIndexHint,
|
|||
}
|
||||
|
||||
void image_get_resinfo(inout u32vec4 vdata, int32_t vmipid, int32_t textureIndexHint, uint32_t tbuffer[8], uint32_t dmask) {
|
||||
i32vec4 result = i32vec4(1);
|
||||
|
||||
switch (uint(tbuffer_type(tbuffer))) {
|
||||
case kTextureType1D: {
|
||||
int texIndex = findTexture1DIndex(textureIndexHint, tbuffer);
|
||||
result.x = textureSize(textures1D[texIndex], vmipid);
|
||||
result.w = textureQueryLevels(textures1D[texIndex]);
|
||||
break;
|
||||
}
|
||||
|
||||
case kTextureTypeArray1D:
|
||||
case kTextureType2D:
|
||||
case kTextureTypeCube:
|
||||
case kTextureTypeArray2D: {
|
||||
int texIndex = findTexture2DIndex(textureIndexHint, tbuffer);
|
||||
result.xy = textureSize(textures2D[texIndex], vmipid);
|
||||
result.w = textureQueryLevels(textures2D[texIndex]);
|
||||
break;
|
||||
}
|
||||
|
||||
case kTextureTypeMsaa2D:
|
||||
case kTextureTypeMsaaArray2D:
|
||||
result.xy = textureSize(textures2D[findTexture2DIndex(textureIndexHint, tbuffer)], 0);
|
||||
break;
|
||||
|
||||
case kTextureType3D: {
|
||||
int texIndex = findTexture3DIndex(textureIndexHint, tbuffer);
|
||||
result.xyz = textureSize(textures3D[texIndex], vmipid);
|
||||
result.w = textureQueryLevels(textures3D[texIndex]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
ImageInfo img = getImageInfo(tbuffer, vmipid);
|
||||
i32vec4 result = i32vec4(img.extent, img.mipLevelCount);
|
||||
|
||||
int vdataIndex = 0;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
|
|
|
|||
|
|
@ -102,6 +102,10 @@ struct ResourcesBuilder {
|
|||
p.resourceSlot = resources.slots++;
|
||||
resources.textures.push_back(p);
|
||||
}
|
||||
void addImageBuffer(gcn::Resources::ImageBuffer p) {
|
||||
p.resourceSlot = resources.slots++;
|
||||
resources.imageBuffers.push_back(p);
|
||||
}
|
||||
void addBuffer(gcn::Resources::Buffer p) {
|
||||
p.resourceSlot = resources.slots++;
|
||||
resources.buffers.push_back(p);
|
||||
|
|
@ -334,6 +338,27 @@ struct ResourcesBuilder {
|
|||
continue;
|
||||
}
|
||||
|
||||
if (inst == ir::amdgpu::IMAGE_BUFFER) {
|
||||
auto access = static_cast<Access>(*inst.getOperand(1).getAsInt32());
|
||||
auto words = inst.getOperands().subspan(2);
|
||||
if (words.size() > 4) {
|
||||
addImageBuffer({
|
||||
.access = access,
|
||||
.words = {words[0].getAsValue(), words[1].getAsValue(),
|
||||
words[2].getAsValue(), words[3].getAsValue(),
|
||||
words[4].getAsValue(), words[5].getAsValue(),
|
||||
words[6].getAsValue(), words[7].getAsValue()},
|
||||
});
|
||||
} else {
|
||||
addImageBuffer({
|
||||
.access = access,
|
||||
.words = {words[0].getAsValue(), words[1].getAsValue(),
|
||||
words[2].getAsValue(), words[3].getAsValue()},
|
||||
});
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inst == ir::amdgpu::SAMPLER) {
|
||||
auto words = inst.getOperands().subspan(1);
|
||||
auto unorm = *inst.getOperand(5).getAsBool();
|
||||
|
|
@ -408,6 +433,20 @@ void gcn::Resources::print(std::ostream &os, ir::NameStorage &ns) const {
|
|||
}
|
||||
}
|
||||
|
||||
if (!imageBuffers.empty()) {
|
||||
os << "image buffers:\n";
|
||||
for (auto &buffer : buffers) {
|
||||
os << " #" << buffer.resourceSlot << ":\n";
|
||||
printAccess(buffer.access);
|
||||
|
||||
for (auto &word : buffer.words) {
|
||||
os << " word" << (&word - buffer.words) << ": ";
|
||||
printFlat(os, word, ns);
|
||||
os << "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!buffers.empty()) {
|
||||
os << "buffers:\n";
|
||||
for (auto &buffer : buffers) {
|
||||
|
|
@ -876,7 +915,8 @@ static void instructionsToSpv(GcnConverter &converter, gcn::Import &importer,
|
|||
}
|
||||
|
||||
if (inst == ir::amdgpu::POINTER || inst == ir::amdgpu::VBUFFER ||
|
||||
inst == ir::amdgpu::SAMPLER || inst == ir::amdgpu::TBUFFER) {
|
||||
inst == ir::amdgpu::SAMPLER || inst == ir::amdgpu::TBUFFER ||
|
||||
inst == ir::amdgpu::IMAGE_BUFFER) {
|
||||
toAnalyze.push_back(inst.staticCast<ir::Value>());
|
||||
continue;
|
||||
}
|
||||
|
|
@ -1216,7 +1256,7 @@ static void instructionsToSpv(GcnConverter &converter, gcn::Import &importer,
|
|||
auto spvFnCall = builder.createSpvFunctionCall(
|
||||
inst.getLocation(), inst.getOperand(0).getAsValue(), function);
|
||||
|
||||
for (auto arg : inst.getOperands().subspan(1)) {
|
||||
for (auto &arg : inst.getOperands().subspan(1)) {
|
||||
spvFnCall.addOperand(arg);
|
||||
}
|
||||
|
||||
|
|
@ -1622,6 +1662,13 @@ static void createInitialValues(GcnConverter &converter,
|
|||
stage, builder, info.create(gcn::ConfigType::MemoryTable, word)));
|
||||
}
|
||||
|
||||
for (int word = 0; word < 2; ++word) {
|
||||
context.writeReg(loc, builder, gcn::RegId::ImageMemoryTable, word,
|
||||
converter.createReadConfig(
|
||||
stage, builder,
|
||||
info.create(gcn::ConfigType::ImageMemoryTable, word)));
|
||||
}
|
||||
|
||||
for (int word = 0; word < 2; ++word) {
|
||||
context.writeReg(
|
||||
loc, builder, gcn::RegId::Gds, word,
|
||||
|
|
|
|||
|
|
@ -557,6 +557,11 @@ readMimgInst(GcnInstruction &inst, std::uint64_t &address,
|
|||
auto srsrc = fetchMaskedValue(words[1], srsrcMask) << 2;
|
||||
auto ssamp = fetchMaskedValue(words[1], ssampMask) << 2;
|
||||
|
||||
const uint32_t kImageFlagR128 = 1 << 4;
|
||||
const uint32_t kImageFlagDA = 1 << 5;
|
||||
const uint32_t kImageFlagUnorm = 1 << 6;
|
||||
const uint32_t kImageFlagTFE = 1 << 7;
|
||||
|
||||
std::uint8_t textureAccess = 0;
|
||||
bool hasSampler = false;
|
||||
|
||||
|
|
@ -583,14 +588,32 @@ readMimgInst(GcnInstruction &inst, std::uint64_t &address,
|
|||
inst.addOperand(createVgprGcnOperand(vdata).withRW());
|
||||
inst.addOperand(createVgprGcnOperand(vaddr).withR());
|
||||
auto tbufferStart = createSgprGcnOperand(address, srsrc);
|
||||
inst.addOperand(
|
||||
GcnOperand::createTexture(tbufferStart, r128).withAccess(textureAccess));
|
||||
inst.addOperand(tbufferStart);
|
||||
|
||||
if (hasSampler) {
|
||||
inst.addOperand(GcnOperand::createTexture(tbufferStart, r128)
|
||||
.withAccess(textureAccess));
|
||||
inst.addOperand(tbufferStart);
|
||||
|
||||
auto samplerStart = createSgprGcnOperand(address, ssamp);
|
||||
inst.addOperand(GcnOperand::createSampler(samplerStart, unrm).withR());
|
||||
inst.addOperand(samplerStart);
|
||||
} else {
|
||||
inst.addOperand(GcnOperand::createImageBuffer(tbufferStart, r128)
|
||||
.withAccess(textureAccess));
|
||||
inst.addOperand(tbufferStart);
|
||||
}
|
||||
|
||||
if (r128) {
|
||||
dmask |= kImageFlagR128;
|
||||
}
|
||||
if (da) {
|
||||
dmask |= kImageFlagDA;
|
||||
}
|
||||
if (unrm) {
|
||||
dmask |= kImageFlagUnorm;
|
||||
}
|
||||
if (tfe) {
|
||||
dmask |= kImageFlagTFE;
|
||||
}
|
||||
|
||||
inst.addOperand(GcnOperand::createConstant(dmask));
|
||||
|
|
@ -982,6 +1005,20 @@ void GcnOperand::print(std::ostream &os) const {
|
|||
getUnderlyingOperand(7).print(os);
|
||||
os << "}";
|
||||
break;
|
||||
case Kind::ImageBuffer128:
|
||||
os << "buffer T#{";
|
||||
getUnderlyingOperand(0).print(os);
|
||||
os << "..";
|
||||
getUnderlyingOperand(3).print(os);
|
||||
os << "}";
|
||||
break;
|
||||
case Kind::ImageBuffer256:
|
||||
os << "buffer T#{";
|
||||
getUnderlyingOperand(0).print(os);
|
||||
os << "..";
|
||||
getUnderlyingOperand(7).print(os);
|
||||
os << "}";
|
||||
break;
|
||||
case Kind::Pointer:
|
||||
os << "ptr{";
|
||||
getUnderlyingOperand(0).print(os);
|
||||
|
|
|
|||
|
|
@ -132,6 +132,7 @@ static std::unordered_set<ir::InstructionId> g_instsWithoutSideEffects = {
|
|||
ir::getInstructionId(ir::amdgpu::VBUFFER),
|
||||
ir::getInstructionId(ir::amdgpu::SAMPLER),
|
||||
ir::getInstructionId(ir::amdgpu::TBUFFER),
|
||||
ir::getInstructionId(ir::amdgpu::IMAGE_BUFFER),
|
||||
ir::getInstructionId(ir::amdgpu::POINTER),
|
||||
ir::getInstructionId(ir::amdgpu::PS_INPUT_VGPR),
|
||||
ir::getInstructionId(ir::amdgpu::PS_COMP_SWAP),
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@
|
|||
#include "spv.hpp"
|
||||
#include "transform.hpp"
|
||||
|
||||
#include <print>
|
||||
#include <rx/die.hpp>
|
||||
|
||||
#include <SPIRV/GlslangToSpv.h>
|
||||
|
|
@ -121,6 +122,14 @@ inline shader::spv::TypeInfo getRegisterInfo(unsigned id) {
|
|||
.componentsCount = 2,
|
||||
};
|
||||
|
||||
case gcn::RegId::ImageMemoryTable:
|
||||
return {
|
||||
.baseType = ir::spv::OpTypeVector,
|
||||
.componentType = ir::spv::OpTypeInt,
|
||||
.componentWidth = 32,
|
||||
.componentsCount = 2,
|
||||
};
|
||||
|
||||
case gcn::RegId::Gds:
|
||||
return {
|
||||
.baseType = ir::spv::OpTypeVector,
|
||||
|
|
@ -161,6 +170,8 @@ inline const char *getRegisterName(unsigned id) {
|
|||
return "thread_id";
|
||||
case gcn::MemoryTable:
|
||||
return "memory_table";
|
||||
case gcn::ImageMemoryTable:
|
||||
return "image_memory_table";
|
||||
case gcn::Gds:
|
||||
return "gds";
|
||||
}
|
||||
|
|
@ -190,6 +201,8 @@ static std::optional<gcn::RegId> getRegIdByName(std::string_view variableName) {
|
|||
return gcn::RegId::ThreadId;
|
||||
if (variableName == "memory_table")
|
||||
return gcn::RegId::MemoryTable;
|
||||
if (variableName == "image_memory_table")
|
||||
return gcn::RegId::ImageMemoryTable;
|
||||
if (variableName == "gds")
|
||||
return gcn::RegId::Gds;
|
||||
|
||||
|
|
@ -825,6 +838,8 @@ static ir::Value deserializeGcnRegion(
|
|||
case GcnOperand::Kind::Buffer:
|
||||
case GcnOperand::Kind::Texture128:
|
||||
case GcnOperand::Kind::Texture256:
|
||||
case GcnOperand::Kind::ImageBuffer128:
|
||||
case GcnOperand::Kind::ImageBuffer256:
|
||||
case GcnOperand::Kind::Sampler:
|
||||
case GcnOperand::Kind::Pointer:
|
||||
break;
|
||||
|
|
@ -849,6 +864,37 @@ static ir::Value deserializeGcnRegion(
|
|||
op.getUnderlyingOperand(2)),
|
||||
createOperandReadImpl(loc, builder, uint32TV,
|
||||
op.getUnderlyingOperand(3)));
|
||||
case GcnOperand::Kind::ImageBuffer128:
|
||||
return builder.createValue(
|
||||
loc, ir::amdgpu::IMAGE_BUFFER, type, op.access,
|
||||
createOperandReadImpl(loc, builder, uint32TV,
|
||||
op.getUnderlyingOperand(0)),
|
||||
createOperandReadImpl(loc, builder, uint32TV,
|
||||
op.getUnderlyingOperand(1)),
|
||||
createOperandReadImpl(loc, builder, uint32TV,
|
||||
op.getUnderlyingOperand(2)),
|
||||
createOperandReadImpl(loc, builder, uint32TV,
|
||||
op.getUnderlyingOperand(3)));
|
||||
|
||||
case GcnOperand::Kind::ImageBuffer256:
|
||||
return builder.createValue(
|
||||
loc, ir::amdgpu::IMAGE_BUFFER, type, op.access,
|
||||
createOperandReadImpl(loc, builder, uint32TV,
|
||||
op.getUnderlyingOperand(0)),
|
||||
createOperandReadImpl(loc, builder, uint32TV,
|
||||
op.getUnderlyingOperand(1)),
|
||||
createOperandReadImpl(loc, builder, uint32TV,
|
||||
op.getUnderlyingOperand(2)),
|
||||
createOperandReadImpl(loc, builder, uint32TV,
|
||||
op.getUnderlyingOperand(3)),
|
||||
createOperandReadImpl(loc, builder, uint32TV,
|
||||
op.getUnderlyingOperand(4)),
|
||||
createOperandReadImpl(loc, builder, uint32TV,
|
||||
op.getUnderlyingOperand(5)),
|
||||
createOperandReadImpl(loc, builder, uint32TV,
|
||||
op.getUnderlyingOperand(6)),
|
||||
createOperandReadImpl(loc, builder, uint32TV,
|
||||
op.getUnderlyingOperand(7)));
|
||||
|
||||
case GcnOperand::Kind::Texture128:
|
||||
return builder.createValue(
|
||||
|
|
@ -1381,8 +1427,8 @@ static ir::Value deserializeGcnRegion(
|
|||
converter.getTypePointer(ir::spv::StorageClass::Function, paramType),
|
||||
ir::spv::StorageClass::Function);
|
||||
|
||||
auto result = createOperandRead(loc, builder, paramType, op);
|
||||
builder.createSpvStore(loc, arg, result);
|
||||
auto result = createOperandRead(loc, builder, paramType, op);
|
||||
builder.createSpvStore(loc, arg, result);
|
||||
|
||||
callArgs.push_back(arg);
|
||||
}
|
||||
|
|
@ -1631,10 +1677,9 @@ ir::Node gcn::Import::getOrCloneImpl(ir::Context &context, ir::Node node,
|
|||
if (shader::spv::getTypeInfo(
|
||||
inst.getOperand(0).getAsValue().getOperand(1).getAsValue()) !=
|
||||
getRegisterInfo(*regId)) {
|
||||
std::fprintf(stderr,
|
||||
"unexpected type for register variable "
|
||||
"'%s', expected %u\n",
|
||||
name->c_str(), getRegisterInfo(*regId).width());
|
||||
std::println(
|
||||
stderr, "unexpected type for register variable '{}', expected {}",
|
||||
*name, getRegisterInfo(*regId).width());
|
||||
std::abort();
|
||||
}
|
||||
|
||||
|
|
@ -1799,7 +1844,7 @@ gcn::deserialize(gcn::Context &context, const gcn::Environment &environment,
|
|||
.createSpvBranch(child.getLocation(), regionEntry);
|
||||
child.remove();
|
||||
} else {
|
||||
std::fprintf(stderr, "failed to evaluate branch!\n");
|
||||
std::println(stderr, "failed to evaluate branch!");
|
||||
}
|
||||
context.requiredUserSgprs |= evaluator.usedUserSgprs;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,9 @@
|
|||
layout (triangles, invocations = 1) in;
|
||||
layout (triangle_strip, max_vertices = 4) out;
|
||||
|
||||
layout (location=0) in vec4 inp[3];
|
||||
layout (location=0) out vec4 outp;
|
||||
|
||||
void main(void)
|
||||
{
|
||||
vec4 topLeft = gl_in[0].gl_Position;
|
||||
|
|
@ -23,15 +26,19 @@ void main(void)
|
|||
topLeft.w
|
||||
);
|
||||
|
||||
outp = inp[0];
|
||||
gl_Position = topLeft;
|
||||
EmitVertex();
|
||||
|
||||
outp = inp[2];
|
||||
gl_Position = bottomLeft;
|
||||
EmitVertex();
|
||||
|
||||
outp = vec4(inp[1].x, inp[0].y, inp[0].z, inp[0].w);
|
||||
gl_Position = topRight;
|
||||
EmitVertex();
|
||||
|
||||
outp = vec4(inp[1].x, inp[2].y, inp[0].z, inp[0].w);
|
||||
gl_Position = bottomRight;
|
||||
EmitVertex();
|
||||
|
||||
|
|
|
|||
|
|
@ -91,6 +91,8 @@ static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request,
|
|||
orbis::g_currentThread->tproc->vmId,
|
||||
{args->cmds + i * 4, 4});
|
||||
}
|
||||
|
||||
// gpu.waitForIdle();
|
||||
} else {
|
||||
return orbis::ErrorCode::BUSY;
|
||||
}
|
||||
|
|
@ -105,6 +107,7 @@ static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request,
|
|||
|
||||
auto args = reinterpret_cast<Args *>(argp);
|
||||
if (auto gpu = amdgpu::DeviceCtl{orbis::g_context.gpuDevice}) {
|
||||
gpu.waitForIdle();
|
||||
gpu.submitSwitchBuffer(orbis::g_currentThread->tproc->vmId);
|
||||
} else {
|
||||
return orbis::ErrorCode::BUSY;
|
||||
|
|
@ -132,18 +135,22 @@ static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request,
|
|||
{args->cmds + i * 4, 4});
|
||||
}
|
||||
|
||||
// ORBIS_LOG_ERROR("submit and write eop", args->eopValue, args->waitFlag);
|
||||
// ORBIS_LOG_ERROR("submit and write eop", args->eopValue,
|
||||
// args->waitFlag);
|
||||
gpu.submitWriteEop(gcFile->gfxPipe, args->waitFlag, args->eopValue);
|
||||
} else {
|
||||
return orbis::ErrorCode::BUSY;
|
||||
}
|
||||
|
||||
// orbis::bridge.sendDoFlip();
|
||||
break;
|
||||
}
|
||||
|
||||
case 0xc0048116: { // submit done?
|
||||
break;
|
||||
if (auto gpu = amdgpu::DeviceCtl{orbis::g_context.gpuDevice}) {
|
||||
gpu.waitForIdle();
|
||||
} else {
|
||||
return orbis::ErrorCode::BUSY;
|
||||
}
|
||||
}
|
||||
|
||||
case 0xc0048117:
|
||||
|
|
@ -243,7 +250,7 @@ static orbis::ErrorCode gc_ioctl(orbis::File *file, std::uint64_t request,
|
|||
|
||||
if (auto gpu = amdgpu::DeviceCtl{orbis::g_context.gpuDevice}) {
|
||||
gpu.submitComputeQueue(args->meId, args->pipeId, args->queueId,
|
||||
args->nextStartOffsetInDw);
|
||||
args->nextStartOffsetInDw);
|
||||
} else {
|
||||
return orbis::ErrorCode::BUSY;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue