[amdgpu] shader: implement storage images

Random instructions fixes
This commit is contained in:
DH 2023-08-06 17:26:24 +03:00
parent 72c7940259
commit 46afeb987e
13 changed files with 429 additions and 63 deletions

View file

@ -319,6 +319,9 @@ public:
Region() = default;
Region(std::size_t expInstCount) { mData.reserve(expInstCount); }
bool isIdDefined(Id id) const { return mIdDefs.contains(id.id); }
bool isIdUsed(Id id) const { return mIdUses.contains(id.id); }
void clear() { mData.clear(); }
const std::uint32_t *data() const { return mData.data(); }
@ -409,8 +412,8 @@ public:
BlockBuilder() = default;
BlockBuilder(IdGenerator &idGenerator, Block id,
std::size_t expInstructionsCount)
: mIdGenerator(&idGenerator), bodyRegion{expInstructionsCount},
terminatorRegion{1}, id(id) {}
: mIdGenerator(&idGenerator), id(id), bodyRegion{expInstructionsCount},
terminatorRegion{1} {}
void moveBlock(BlockBuilder &&other) {
prefix.pushRegion(other.prefix);
@ -1534,6 +1537,53 @@ public:
return id;
}
VectorOfValue<FloatType> createImageRead(
VectorOfType<FloatType> resultType, ImageValue image,
ScalarOrVectorOfValue<UIntType> coords,
spv::ImageOperandsMask operands = spv::ImageOperandsMask::MaskNone,
std::span<const Id> args = {}) {
auto region = bodyRegion.pushOp(
spv::Op::OpImageRead,
5 + (operands == spv::ImageOperandsMask::MaskNone ? 0
: 1 + args.size()));
auto id = newId<VectorOfValue<FloatType>>();
region.pushIdUse(resultType);
region.pushIdDef(id);
region.pushIdUse(image);
region.pushIdUse(coords);
if (operands != spv::ImageOperandsMask::MaskNone) {
region.pushWord(static_cast<unsigned>(operands));
for (auto arg : args) {
region.pushIdUse(arg);
}
}
return id;
}
void createImageWrite(
ImageValue image, ScalarOrVectorOfValue<UIntType> coords, Value texel,
spv::ImageOperandsMask operands = spv::ImageOperandsMask::MaskNone,
std::span<const Id> args = {}) {
auto region = bodyRegion.pushOp(
spv::Op::OpImageWrite,
4 + (operands == spv::ImageOperandsMask::MaskNone ? 0
: 1 + args.size()));
region.pushIdUse(image);
region.pushIdUse(coords);
region.pushIdUse(texel);
if (operands != spv::ImageOperandsMask::MaskNone) {
region.pushWord(static_cast<unsigned>(operands));
for (auto arg : args) {
region.pushIdUse(arg);
}
}
}
Value createImageQuerySizeLod(Type resultType, ImageValue image, Value lod) {
auto region = bodyRegion.pushOp(spv::Op::OpImageQuerySizeLod, 5);
auto id = newId<Value>();
@ -1655,6 +1705,38 @@ private:
SpirvBuilder &operator=(SpirvBuilder &&) = default;
public:
bool isIdDefined(Id id) const {
std::array regions = {
// &capabilityRegion, &extensionRegion, &extInstRegion,
// &memoryModelRegion, &entryPointRegion, &executionModeRegion,
// &debugRegion, &annotationRegion, &globalRegion,
&functionRegion,
};
for (auto reg : regions) {
if (reg->isIdDefined(id)) {
return true;
}
}
return false;
}
bool isIdUsed(Id id) const {
std::array regions = {
&capabilityRegion, &extensionRegion, &extInstRegion,
&memoryModelRegion, &entryPointRegion, &executionModeRegion,
&debugRegion, &annotationRegion, &globalRegion,
&functionDeclRegion, &functionRegion,
};
for (auto reg : regions) {
if (reg->isIdUsed(id)) {
return true;
}
}
return false;
}
SpirvBuilder() = default;
SpirvBuilder(IdGenerator &idGenerator, std::size_t expInstructionsCount)
@ -1696,6 +1778,8 @@ public:
functionRegion.clear();
}
IdGenerator *getIdGenerator() const { return mIdGenerator; }
std::vector<std::uint32_t> build(std::uint32_t spirvVersion,
std::uint32_t generatorMagic) {
const std::size_t headerSize = 5;

View file

@ -9,6 +9,9 @@ constexpr AccessOp operator|(AccessOp lhs, AccessOp rhs) {
constexpr AccessOp operator&(AccessOp lhs, AccessOp rhs) {
return static_cast<AccessOp>(static_cast<int>(lhs) & static_cast<int>(rhs));
}
constexpr AccessOp operator~(AccessOp rhs) {
return static_cast<AccessOp>(~static_cast<int>(rhs));
}
constexpr AccessOp &operator|=(AccessOp &lhs, AccessOp rhs) {
return ((lhs = lhs | rhs));
}

View file

@ -12,7 +12,7 @@
namespace amdgpu::shader {
struct Shader {
enum class UniformKind { Buffer, Sampler, Image };
enum class UniformKind { Buffer, Sampler, StorageImage, Image };
struct UniformInfo {
std::uint32_t binding;

View file

@ -231,6 +231,9 @@ public:
spirv::ImageType getImage2DType() {
return spirv::cast<spirv::ImageType>(getType(TypeId::Image2D));
}
spirv::ImageType getStorageImage2DType() {
return spirv::cast<spirv::ImageType>(getType(TypeId::StorageImage2D));
}
spirv::SampledImageType getSampledImage2DType() {
return spirv::cast<spirv::SampledImageType>(
getType(TypeId::SampledImage2D));

View file

@ -1,4 +1,5 @@
#pragma once
#include "AccessOp.hpp"
#include "RegisterId.hpp"
#include "RegisterState.hpp"
#include "TypeId.hpp"
@ -36,7 +37,8 @@ struct Fragment {
// std::optional<RegisterId> findInput(spirv::Value value);
// Value addInput(RegisterId id, spirv::Type type);
spirv::SamplerValue createSampler(RegisterId base);
spirv::ImageValue createImage(RegisterId base, bool r128); // TODO: params
spirv::ImageValue createImage(RegisterId base, bool r128, bool sampled,
AccessOp access); // TODO: params
Value createCompositeExtract(Value composite, std::uint32_t member);
Value getOperand(RegisterId id, TypeId type,
OperandGetFlags flags = OperandGetFlags::None);

View file

@ -29,6 +29,7 @@ struct TypeId {
ArrayFloat32x16,
Sampler,
Image2D,
StorageImage2D,
SampledImage2D,
Void // should be last

View file

@ -8,12 +8,14 @@ struct UniformBindings {
static constexpr auto kBufferSlots = 16;
static constexpr auto kImageSlots = 16;
static constexpr auto kSamplerSlots = 16;
static constexpr auto kStorageImageSlots = 16;
static constexpr auto kBufferOffset = 0;
static constexpr auto kImageOffset = kBufferOffset + kBufferSlots;
static constexpr auto kSamplerOffset = kImageOffset + kImageSlots;
static constexpr auto kStorageImageOffset = kSamplerOffset + kSamplerSlots;
static constexpr auto kStageSize = kSamplerOffset + kSamplerSlots;
static constexpr auto kStageSize = kStorageImageOffset + kStorageImageSlots;
static constexpr auto kVertexOffset = 0;
static constexpr auto kFragmentOffset = kStageSize;
@ -34,6 +36,14 @@ struct UniformBindings {
return index + getStageOffset(stage) + kImageOffset;
}
static unsigned getStorageImageBinding(Stage stage, unsigned index) {
if (index >= kStorageImageSlots) {
util::unreachable();
}
return index + getStageOffset(stage) + kStorageImageOffset;
}
static unsigned getSamplerBinding(Stage stage, unsigned index) {
if (index >= kSamplerSlots) {
util::unreachable();

View file

@ -371,6 +371,8 @@ amdgpu::shader::convert(RemoteMemory memory, Stage stage, std::uint64_t entry,
builder.createCapability(spv::Capability::UniformAndStorageBuffer8BitAccess);
builder.createCapability(spv::Capability::UniformAndStorageBuffer16BitAccess);
builder.createCapability(spv::Capability::Int64);
builder.createCapability(spv::Capability::StorageImageWriteWithoutFormat);
builder.createCapability(spv::Capability::StorageImageReadWithoutFormat);
builder.setMemoryModel(spv::AddressingModel::Logical,
spv::MemoryModel::GLSL450);
@ -410,6 +412,7 @@ amdgpu::shader::convert(RemoteMemory memory, Stage stage, std::uint64_t entry,
std::size_t samplerCount = 0;
std::size_t imageCount = 0;
std::size_t storageImageCount = 0;
std::size_t bufferCount = 0;
for (auto &uniform : ctxt.getUniforms()) {
@ -427,6 +430,11 @@ amdgpu::shader::convert(RemoteMemory memory, Stage stage, std::uint64_t entry,
newUniform.binding =
UniformBindings::getSamplerBinding(stage, samplerCount++);
break;
case TypeId::StorageImage2D:
newUniform.kind = Shader::UniformKind::StorageImage;
newUniform.binding =
UniformBindings::getStorageImageBinding(stage, storageImageCount++);
break;
case TypeId::Image2D:
newUniform.kind = Shader::UniformKind::Image;
newUniform.binding =
@ -478,6 +486,14 @@ amdgpu::shader::convert(RemoteMemory memory, Stage stage, std::uint64_t entry,
{{dimX, dimY, dimZ}});
}
result.spirv = ctxt.getBuilder().build(SPV_VERSION, 0);
// auto maxId = ctxt.getBuilder().getIdGenerator()->bounds;
// for (std::size_t i = 1; i < maxId; ++i) {
// spirv::Id id;
// id.id = i;
// if (builder.isIdDefined(id) && !builder.isIdUsed(id)) {
// std::printf("ssa variable %%%zu defined, but not used\n", i);
// }
// }
result.spirv = builder.build(SPV_VERSION, 0);
return result;
}

View file

@ -136,6 +136,10 @@ spirv::Type ConverterContext::getType(TypeId id) {
return ((type = getBuilder().createTypeImage(getFloat32Type(),
spv::Dim::Dim2D, 0, 0, 0, 1,
spv::ImageFormat::Unknown)));
case TypeId::StorageImage2D:
return ((type = getBuilder().createTypeImage(getFloat32Type(),
spv::Dim::Dim2D, 0, 0, 0, 2,
spv::ImageFormat::Unknown)));
case TypeId::SampledImage2D:
return ((type = getBuilder().createTypeSampledImage(getImage2DType())));

View file

@ -81,7 +81,7 @@ std::uint32_t sizeOfFormat(SurfaceFormat format) {
case kSurfaceFormat32_32_32_32:
return 128;
default:
util::unreachable();
util::unreachable("unsupported format %u", format);
}
}
@ -425,21 +425,79 @@ void convertToFormat(RegisterId sourceRegister, int count, Fragment &fragment,
uniformPointerType, uniform->variable,
{{fragment.context->getUInt32(0), channelOffset}});
spirv::Value channelValue;
switch (channelType) {
case kTextureChannelTypeFloat:
case kTextureChannelTypeSInt:
case kTextureChannelTypeUInt:
fragment.builder.createStore(
uniformPointerValue,
case kTextureChannelTypeUNorm: {
channelValue =
fragment
.getOperand(RegisterId::Raw(sourceRegister + channel),
storeType)
.value);
TypeId::Float32)
.value;
auto maxValue =
(static_cast<std::uint64_t>(1) << (channelSize * 8)) - 1;
channelValue =
builder.createFMul(fragment.context->getFloat32Type(),
spirv::cast<spirv::FloatValue>(channelValue),
fragment.context->getFloat32(maxValue));
channelValue = builder.createConvertFToU(
fragment.context->getType(TypeId::UInt32), channelValue);
if (storeType != TypeId::UInt32) {
channelValue = builder.createUConvert(
fragment.context->getType(storeType),
spirv::cast<spirv::UIntValue>(channelValue));
}
break;
}
case kTextureChannelTypeFloat:
channelValue =
fragment
.getOperand(RegisterId::Raw(sourceRegister + channel),
TypeId::Float32)
.value;
if (storeType != TypeId::Float32) {
channelValue = fragment.builder.createFConvert(
fragment.context->getType(storeType), channelValue);
}
break;
case kTextureChannelTypeSInt:
channelValue =
fragment
.getOperand(RegisterId::Raw(sourceRegister + channel),
TypeId::SInt32)
.value;
if (storeType != TypeId::SInt32) {
channelValue = fragment.builder.createSConvert(
fragment.context->getType(storeType),
spirv::cast<spirv::SIntValue>(channelValue));
}
break;
case kTextureChannelTypeUInt:
channelValue =
fragment
.getOperand(RegisterId::Raw(sourceRegister + channel),
TypeId::UInt32)
.value;
if (storeType != TypeId::UInt32) {
channelValue = fragment.builder.createUConvert(
fragment.context->getType(storeType),
spirv::cast<spirv::UIntValue>(channelValue));
}
break;
default:
util::unreachable("unimplemented channel type %u", channelType);
}
fragment.builder.createStore(uniformPointerValue, channelValue);
}
for (; channel < count; ++channel) {
@ -1163,16 +1221,42 @@ void convertVop2(Fragment &fragment, Vop2 inst) {
}
void convertSop2(Fragment &fragment, Sop2 inst) {
fragment.registers->pc += Sop2::kMinInstSize * sizeof(std::uint32_t);
auto &builder = fragment.builder;
auto context = fragment.context;
auto sCarry = [&](spirv::SIntValue a, spirv::SIntValue b,
spirv::SIntValue result) {
auto boolT = context->getBoolType();
auto uint32T = context->getUInt32Type();
auto s0 = context->getSInt32(0);
auto u1 = context->getUInt32(1);
auto u0 = context->getUInt32(0);
auto aLtZero = builder.createSelect(
uint32T, builder.createSLessThan(boolT, a, s0), u1, u0);
auto bLtZero = builder.createSelect(
uint32T, builder.createSLessThan(boolT, b, s0), u1, u0);
auto resultLtZero = builder.createSelect(
uint32T, builder.createSLessThan(boolT, result, s0), u1, u0);
auto argsSignEq = builder.createIEqual(boolT, aLtZero, bLtZero);
auto resSignNe = builder.createINotEqual(boolT, resultLtZero, aLtZero);
return Value{boolT, builder.createLogicalAnd(boolT, argsSignEq, resSignNe)};
};
switch (inst.op) {
case Sop2::Op::S_ADD_U32: {
auto src0 = spirv::cast<spirv::UIntValue>(
fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value);
auto src1 = spirv::cast<spirv::UIntValue>(
fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value);
auto resultT = fragment.context->getUInt32Type();
auto result = fragment.builder.createIAdd(resultT, src0, src1);
fragment.setScc({resultT, result});
fragment.setScalarOperand(inst.sdst, {resultT, result});
auto src0 = fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value;
auto src1 = fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value;
auto uintT = fragment.context->getType(TypeId::UInt32);
auto resultStruct =
fragment.context->getStructType(std::array{uintT, uintT});
auto result = fragment.builder.createIAddCarry(resultStruct, src0, src1);
fragment.setScalarOperand(
inst.sdst,
{uintT, fragment.builder.createCompositeExtract(
uintT, result, {{static_cast<std::uint32_t>(0)}})});
fragment.setScc(
{uintT, fragment.builder.createCompositeExtract(
uintT, result, {{static_cast<std::uint32_t>(1)}})});
break;
}
case Sop2::Op::S_ADD_I32: {
@ -1182,7 +1266,35 @@ void convertSop2(Fragment &fragment, Sop2 inst) {
fragment.getScalarOperand(inst.ssrc1, TypeId::SInt32).value);
auto resultT = fragment.context->getSint32Type();
auto result = fragment.builder.createIAdd(resultT, src0, src1);
fragment.setScc({resultT, result});
fragment.setScc(sCarry(src0, src1, result));
fragment.setScalarOperand(inst.sdst, {resultT, result});
break;
}
case Sop2::Op::S_SUB_U32: {
auto src0 = fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value;
auto src1 = fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value;
auto uintT = fragment.context->getType(TypeId::UInt32);
auto resultStruct =
fragment.context->getStructType(std::array{uintT, uintT});
auto result = fragment.builder.createISubBorrow(resultStruct, src0, src1);
fragment.setScalarOperand(
inst.sdst,
{uintT, fragment.builder.createCompositeExtract(
uintT, result, {{static_cast<std::uint32_t>(0)}})});
fragment.setScc(
{uintT, fragment.builder.createCompositeExtract(
uintT, result, {{static_cast<std::uint32_t>(1)}})});
break;
}
case Sop2::Op::S_SUB_I32: {
auto src0 = spirv::cast<spirv::SIntValue>(
fragment.getScalarOperand(inst.ssrc0, TypeId::SInt32).value);
auto src1 = spirv::cast<spirv::SIntValue>(
fragment.getScalarOperand(inst.ssrc1, TypeId::SInt32).value);
auto resultT = fragment.context->getSint32Type();
auto result = fragment.builder.createISub(resultT, src0, src1);
fragment.setScc(sCarry(src0, src1, result));
fragment.setScalarOperand(inst.sdst, {resultT, result});
break;
}
@ -1685,6 +1797,12 @@ void convertVop3(Fragment &fragment, Vop3 inst) {
return result;
};
auto roundEven = [&](spirv::Type type, spirv::Value value) {
auto glslStd450 = fragment.context->getGlslStd450();
return Value{type, fragment.builder.createExtInst(
type, glslStd450, GLSLstd450RoundEven, {{value}})};
};
auto cmpOp = [&](TypeId type, CmpKind kind, CmpFlags flags = CmpFlags::None) {
auto src0 = fragment.getScalarOperand(inst.src0, type).value;
auto src1 = fragment.getScalarOperand(inst.src1, type).value;
@ -2456,7 +2574,7 @@ void convertVop3(Fragment &fragment, Vop3 inst) {
floatT, float1, spirv::cast<spirv::FloatValue>(src.value));
auto result = applyClamp(applyOmod({floatT, resultValue}));
fragment.setVectorOperand(inst.vdst, result);
fragment.setVectorOperand(inst.vdst, roundEven(result.type, result.value));
break;
}
@ -2475,7 +2593,8 @@ void convertVop3(Fragment &fragment, Vop3 inst) {
inst.sdst,
{uintT, fragment.builder.createCompositeExtract(
uintT, result, std::array{static_cast<std::uint32_t>(1)})});
// TODO: update sdst + 1
fragment.setScalarOperand(inst.sdst + 1,
{uintT, fragment.context->getUInt32(0)});
break;
}
@ -2524,6 +2643,18 @@ void convertVop3(Fragment &fragment, Vop3 inst) {
fragment.setVectorOperand(inst.vdst, result);
break;
}
case Vop3::Op::V3_MUL_LO_U32: {
auto resultT = fragment.context->getUInt32Type();
auto src0 = getSrc(0, TypeId::UInt32);
auto src1 = getSrc(1, TypeId::UInt32);
auto resultValue = fragment.builder.createIMul(
resultT, spirv::cast<spirv::UIntValue>(src0.value),
spirv::cast<spirv::UIntValue>(src1.value));
auto result = applyClamp(applyOmod({resultT, resultValue}));
fragment.setVectorOperand(inst.vdst, result);
break;
}
case Vop3::Op::V3_MUL_LO_I32: {
auto resultT = fragment.context->getSint32Type();
auto src0 = getSrc(0, TypeId::SInt32);
@ -3106,15 +3237,23 @@ void convertMubuf(Fragment &fragment, Mubuf inst) {
static_cast<int>(Mubuf::Op::BUFFER_LOAD_FORMAT_X) + 1;
auto vbuffer = getVBuffer();
auto address = getAddress(&vbuffer);
if (vbuffer.dfmt != kSurfaceFormatInvalid) {
auto address = getAddress(&vbuffer);
spirv::Value result[4];
auto resultType = convertFromFormat(
result, count, fragment, reinterpret_cast<std::uint32_t *>(&vbuffer),
address, vbuffer.dfmt, vbuffer.nfmt);
spirv::Value result[4];
auto resultType = convertFromFormat(
result, count, fragment, reinterpret_cast<std::uint32_t *>(&vbuffer),
address, vbuffer.dfmt, vbuffer.nfmt);
for (std::uint32_t i = 0; i < count; ++i) {
fragment.setVectorOperand(inst.vdata + i, {resultType, result[i]});
for (std::uint32_t i = 0; i < count; ++i) {
fragment.setVectorOperand(inst.vdata + i, {resultType, result[i]});
}
} else {
auto floatT = fragment.context->getFloat32Type();
auto zero = fragment.context->getFloat32(0);
for (std::uint32_t i = 0; i < count; ++i) {
fragment.setVectorOperand(inst.vdata + i, {floatT, zero});
}
}
break;
}
@ -3128,11 +3267,13 @@ void convertMubuf(Fragment &fragment, Mubuf inst) {
1;
auto vbuffer = getVBuffer();
auto address = getAddress(&vbuffer);
if (vbuffer.dfmt != kSurfaceFormatInvalid) {
auto address = getAddress(&vbuffer);
convertToFormat(RegisterId::Vector(inst.vdata), count, fragment,
reinterpret_cast<std::uint32_t *>(&vbuffer), address,
vbuffer.dfmt, vbuffer.nfmt);
convertToFormat(RegisterId::Vector(inst.vdata), count, fragment,
reinterpret_cast<std::uint32_t *>(&vbuffer), address,
vbuffer.dfmt, vbuffer.nfmt);
}
break;
}
@ -3196,7 +3337,6 @@ void convertMubuf(Fragment &fragment, Mubuf inst) {
auto vbuffer = getVBuffer();
auto address = getAddress(&vbuffer);
auto storeType = fragment.context->getType(TypeId::UInt32);
auto uniform = fragment.context->getOrCreateStorageBuffer(
reinterpret_cast<std::uint32_t *>(&vbuffer), TypeId::UInt32);
uniform->accessOp |= AccessOp::Store;
@ -3365,9 +3505,15 @@ void convertMtbuf(Fragment &fragment, Mtbuf inst) {
auto indexMsb = builder.createIMul(
uint32T, index_msb, fragment.context->getUInt32(vbuffer->stride));
auto indexLsb = builder.createIMul(
uint32T, index_lsb,
fragment.context->getUInt32(vbuffer->element_size));
address = builder.createIAdd(
uint32T, address,
builder.createIMul(uint32T, indexMsb, indexStride));
address = builder.createIAdd(uint32T, address, indexLsb);
} else if (offset) {
auto indexStride = fragment.context->getUInt32(vbuffer->index_stride);
auto elementSize = fragment.context->getUInt32(vbuffer->element_size);
@ -3527,9 +3673,15 @@ void convertMtbuf(Fragment &fragment, Mtbuf inst) {
auto indexMsb = builder.createIMul(
uint32T, index_msb, fragment.context->getUInt32(vbuffer->stride));
auto indexLsb = builder.createIMul(
uint32T, index_lsb,
fragment.context->getUInt32(vbuffer->element_size));
address = builder.createIAdd(
uint32T, address,
builder.createIMul(uint32T, indexMsb, indexStride));
address = builder.createIAdd(uint32T, address, indexLsb);
} else if (offset) {
auto indexStride = fragment.context->getUInt32(vbuffer->index_stride);
auto elementSize = fragment.context->getUInt32(vbuffer->element_size);
@ -3565,8 +3717,9 @@ void convertMimg(Fragment &fragment, Mimg inst) {
fragment.registers->pc += Mimg::kMinInstSize * sizeof(std::uint32_t);
switch (inst.op) {
case Mimg::Op::IMAGE_GET_RESINFO: {
auto image =
fragment.createImage(RegisterId::Raw(inst.srsrc << 2), inst.r128);
auto image = fragment.createImage(RegisterId::Raw(inst.srsrc << 2),
inst.r128, true, // fixme, should be any
AccessOp::None);
spirv::Value values[4];
auto uint32T = fragment.context->getUInt32Type();
@ -3600,8 +3753,8 @@ void convertMimg(Fragment &fragment, Mimg inst) {
}
case Mimg::Op::IMAGE_SAMPLE_LZ: {
auto image =
fragment.createImage(RegisterId::Raw(inst.srsrc << 2), inst.r128);
auto image = fragment.createImage(RegisterId::Raw(inst.srsrc << 2),
inst.r128, true, AccessOp::Load);
auto sampler = fragment.createSampler(RegisterId::Raw(inst.ssamp << 2));
auto coord0 = fragment.getVectorOperand(inst.vaddr, TypeId::Float32).value;
auto coord1 =
@ -3633,8 +3786,8 @@ void convertMimg(Fragment &fragment, Mimg inst) {
}
case Mimg::Op::IMAGE_SAMPLE: {
auto image =
fragment.createImage(RegisterId::Raw(inst.srsrc << 2), inst.r128);
auto image = fragment.createImage(RegisterId::Raw(inst.srsrc << 2),
inst.r128, true, AccessOp::Load);
auto sampler = fragment.createSampler(RegisterId::Raw(inst.ssamp << 2));
auto coord0 = fragment.getVectorOperand(inst.vaddr, TypeId::Float32).value;
auto coord1 =
@ -3664,6 +3817,66 @@ void convertMimg(Fragment &fragment, Mimg inst) {
break;
}
case Mimg::Op::IMAGE_STORE:
case Mimg::Op::IMAGE_STORE_MIP: {
auto image = fragment.createImage(RegisterId::Raw(inst.srsrc << 2),
inst.r128, false, AccessOp::Store);
auto coord0 = fragment.getVectorOperand(inst.vaddr, TypeId::UInt32).value;
auto coord1 =
fragment.getVectorOperand(inst.vaddr + 1, TypeId::UInt32).value;
auto coord2 =
fragment.getVectorOperand(inst.vaddr + 2, TypeId::UInt32).value;
auto coords = fragment.builder.createCompositeConstruct(
fragment.context->getUint32x3Type(),
{{coord0, coord1, coord2}}); // TODO
auto float4T = fragment.context->getFloat32x4Type();
spirv::Value values[4];
for (std::uint32_t dstOffset = 0, i = 0; i < 4; ++i) {
if (inst.dmask & (1 << i)) {
values[i] =
fragment.getVectorOperand(inst.vdata + dstOffset++, TypeId::Float32)
.value;
} else {
values[i] = fragment.context->getFloat32(0);
}
}
auto value = fragment.builder.createCompositeConstruct(float4T, values);
fragment.builder.createImageWrite(image, coords, value);
break;
}
case Mimg::Op::IMAGE_LOAD:
case Mimg::Op::IMAGE_LOAD_MIP: {
auto image = fragment.createImage(RegisterId::Raw(inst.srsrc << 2),
inst.r128, false, AccessOp::Load);
auto coord0 = fragment.getVectorOperand(inst.vaddr, TypeId::UInt32).value;
auto coord1 =
fragment.getVectorOperand(inst.vaddr + 1, TypeId::UInt32).value;
auto coord2 =
fragment.getVectorOperand(inst.vaddr + 2, TypeId::UInt32).value;
auto coords = fragment.builder.createCompositeConstruct(
fragment.context->getUint32x3Type(),
{{coord0, coord1, coord2}}); // TODO
auto float4T = fragment.context->getFloat32x4Type();
auto floatT = fragment.context->getFloat32Type();
auto value = fragment.builder.createImageRead(float4T, image, coords);
for (std::uint32_t dstOffset = 0, i = 0; i < 4; ++i) {
if (inst.dmask & (1 << i)) {
fragment.setVectorOperand(
inst.vdata + dstOffset++,
{floatT,
fragment.builder.createCompositeExtract(floatT, value, {{i}})});
}
}
break;
}
case Mimg::Op::IMAGE_GET_LOD: {
auto intT = fragment.context->getUInt32Type();
for (std::uint32_t dstOffset = 0, i = 0; i < 4; ++i) {
@ -3799,6 +4012,14 @@ void convertExp(Fragment &fragment, Exp inst) {
void convertVop1(Fragment &fragment, Vop1 inst) {
fragment.registers->pc += Vop1::kMinInstSize * sizeof(std::uint32_t);
auto roundEven = [&](spirv::Type type, spirv::Value value) {
// auto glslStd450 = fragment.context->getGlslStd450();
// return Value{type, fragment.builder.createExtInst(
// type, glslStd450, GLSLstd450RoundEven,
// {{value}})};
return Value{type, value};
};
switch (inst.op) {
case Vop1::Op::V_MOV_B32:
fragment.setVectorOperand(
@ -3806,6 +4027,22 @@ void convertVop1(Fragment &fragment, Vop1 inst) {
OperandGetFlags::PreserveType));
break;
case Vop1::Op::V_RCP_IFLAG_F32: {
auto src = spirv::cast<spirv::FloatValue>(
fragment.getScalarOperand(inst.src0, TypeId::Float32).value);
auto floatT = fragment.context->getFloat32Type();
auto isNotZero = fragment.builder.createFOrdNotEqual(
fragment.context->getBoolType(), src, fragment.context->getFloat32(0));
src = fragment.builder.createSelect(
floatT, isNotZero, src, fragment.context->getFloat32(0.0000001));
auto float1 = fragment.context->getFloat32(1);
auto result = fragment.builder.createFDiv(floatT, float1, src);
fragment.setVectorOperand(inst.vdst, roundEven(floatT, result));
break;
}
case Vop1::Op::V_RCP_F32: {
auto src = spirv::cast<spirv::FloatValue>(
fragment.getScalarOperand(inst.src0, TypeId::Float32).value);
@ -3813,7 +4050,7 @@ void convertVop1(Fragment &fragment, Vop1 inst) {
auto float1 = fragment.context->getFloat32(1);
auto result = fragment.builder.createFDiv(floatT, float1, src);
fragment.setVectorOperand(inst.vdst, {floatT, result});
fragment.setVectorOperand(inst.vdst, roundEven(floatT, result));
break;
}
@ -3870,7 +4107,7 @@ void convertVop1(Fragment &fragment, Vop1 inst) {
auto result = fragment.builder.createExtInst(floatT, glslStd450,
GLSLstd450Exp2, {{src}});
fragment.setVectorOperand(inst.vdst, {floatT, result});
fragment.setVectorOperand(inst.vdst, roundEven(floatT, result));
break;
}
@ -3946,7 +4183,7 @@ void convertVop1(Fragment &fragment, Vop1 inst) {
auto result = fragment.builder.createExtInst(floatT, glslStd450,
GLSLstd450Sin, {{src}});
fragment.setVectorOperand(inst.vdst, {floatT, result});
fragment.setVectorOperand(inst.vdst, roundEven(floatT, result));
break;
}
case Vop1::Op::V_COS_F32: {
@ -3960,7 +4197,7 @@ void convertVop1(Fragment &fragment, Vop1 inst) {
auto result = fragment.builder.createExtInst(floatT, glslStd450,
GLSLstd450Cos, {{src}});
fragment.setVectorOperand(inst.vdst, {floatT, result});
fragment.setVectorOperand(inst.vdst, roundEven(floatT, result));
break;
}
@ -5253,7 +5490,8 @@ spirv::SamplerValue Fragment::createSampler(RegisterId base) {
}
}
spirv::ImageValue Fragment::createImage(RegisterId base, bool r128) {
spirv::ImageValue Fragment::createImage(RegisterId base, bool r128,
bool sampled, AccessOp access) {
auto tBuffer0 = getOperand(RegisterId::Raw(base + 0), TypeId::UInt32);
auto tBuffer1 = getOperand(RegisterId::Raw(base + 1), TypeId::UInt32);
auto tBuffer2 = getOperand(RegisterId::Raw(base + 2), TypeId::UInt32);
@ -5269,6 +5507,10 @@ spirv::ImageValue Fragment::createImage(RegisterId base, bool r128) {
util::unreachable();
}
auto imageTypeId = sampled ? TypeId::Image2D : TypeId::StorageImage2D;
auto imageType =
sampled ? context->getImage2DType() : context->getStorageImage2DType();
if (r128) {
std::uint32_t sbuffer[] = {
*optTBuffer0Value,
@ -5278,8 +5520,9 @@ spirv::ImageValue Fragment::createImage(RegisterId base, bool r128) {
};
auto uniform = context->getOrCreateUniformConstant(
sbuffer, std::size(sbuffer), TypeId::Image2D);
return builder.createLoad(context->getImage2DType(), uniform->variable);
sbuffer, std::size(sbuffer), imageTypeId);
uniform->accessOp |= access;
return builder.createLoad(imageType, uniform->variable);
}
auto tBuffer4 = getOperand(RegisterId::Raw(base + 4), TypeId::UInt32);
@ -5304,8 +5547,9 @@ spirv::ImageValue Fragment::createImage(RegisterId base, bool r128) {
};
auto uniform = context->getOrCreateUniformConstant(
sbuffer, std::size(sbuffer), TypeId::Image2D);
return builder.createLoad(context->getImage2DType(), uniform->variable);
sbuffer, std::size(sbuffer), imageTypeId);
uniform->accessOp |= access;
return builder.createLoad(imageType, uniform->variable);
}
Value Fragment::createCompositeExtract(Value composite, std::uint32_t member) {
@ -5697,15 +5941,15 @@ void amdgpu::shader::Fragment::convert(std::uint64_t size) {
while (ptr < endptr) {
Instruction inst(ptr);
// auto startPoint = builder.bodyRegion.getCurrentPosition();
auto startPoint = builder.bodyRegion.getCurrentPosition();
// std::printf("===============\n");
// inst.dump();
// std::printf("\n");
std::printf("===============\n");
inst.dump();
std::printf("\n");
convertInstruction(*this, inst);
// std::printf("-------------->\n");
// spirv::dump(builder.bodyRegion.getCurrentPosition() - startPoint);
std::printf("-------------->\n");
spirv::dump(builder.bodyRegion.getCurrentPosition() - startPoint);
ptr += inst.size();
}

View file

@ -1,6 +1,5 @@
#include "TypeId.hpp"
#include "util/unreachable.hpp"
#include <cstdint>
amdgpu::shader::TypeId amdgpu::shader::TypeId::getBaseType() const {
switch (raw) {
@ -19,6 +18,7 @@ amdgpu::shader::TypeId amdgpu::shader::TypeId::getBaseType() const {
case TypeId::Float64:
case TypeId::Sampler:
case TypeId::Image2D:
case TypeId::StorageImage2D:
case TypeId::SampledImage2D:
return raw;
@ -44,6 +44,7 @@ std::size_t amdgpu::shader::TypeId::getSize() const {
switch (raw) {
case TypeId::Void:
case TypeId::Sampler:
case TypeId::StorageImage2D:
case TypeId::Image2D:
case TypeId::SampledImage2D:
return 0;
@ -124,6 +125,7 @@ std::size_t amdgpu::shader::TypeId::getElementsCount() const {
case TypeId::Void:
case TypeId::Sampler:
case TypeId::Image2D:
case TypeId::StorageImage2D:
case TypeId::SampledImage2D:
return 0;
}

View file

@ -1,6 +1,6 @@
#include "cf.hpp"
#include <cassert>
#include <fstream>
#include <cstdlib>
#include <unordered_set>
void cf::BasicBlock::split(BasicBlock *target) {

View file

@ -1,8 +1,5 @@
#include "scf.hpp"
#include "cf.hpp"
#include <cassert>
#include <fstream>
#include <unordered_set>
#include <utility>
void scf::Block::eraseFrom(Node *endBefore) {