mirror of
https://github.com/RPCSX/rpcsx.git
synced 2026-01-20 15:40:30 +01:00
[amdgpu] shader: implement storage images
Random instructions fixes
This commit is contained in:
parent
72c7940259
commit
46afeb987e
|
|
@ -319,6 +319,9 @@ public:
|
|||
Region() = default;
|
||||
Region(std::size_t expInstCount) { mData.reserve(expInstCount); }
|
||||
|
||||
bool isIdDefined(Id id) const { return mIdDefs.contains(id.id); }
|
||||
bool isIdUsed(Id id) const { return mIdUses.contains(id.id); }
|
||||
|
||||
void clear() { mData.clear(); }
|
||||
|
||||
const std::uint32_t *data() const { return mData.data(); }
|
||||
|
|
@ -409,8 +412,8 @@ public:
|
|||
BlockBuilder() = default;
|
||||
BlockBuilder(IdGenerator &idGenerator, Block id,
|
||||
std::size_t expInstructionsCount)
|
||||
: mIdGenerator(&idGenerator), bodyRegion{expInstructionsCount},
|
||||
terminatorRegion{1}, id(id) {}
|
||||
: mIdGenerator(&idGenerator), id(id), bodyRegion{expInstructionsCount},
|
||||
terminatorRegion{1} {}
|
||||
|
||||
void moveBlock(BlockBuilder &&other) {
|
||||
prefix.pushRegion(other.prefix);
|
||||
|
|
@ -1534,6 +1537,53 @@ public:
|
|||
return id;
|
||||
}
|
||||
|
||||
VectorOfValue<FloatType> createImageRead(
|
||||
VectorOfType<FloatType> resultType, ImageValue image,
|
||||
ScalarOrVectorOfValue<UIntType> coords,
|
||||
spv::ImageOperandsMask operands = spv::ImageOperandsMask::MaskNone,
|
||||
std::span<const Id> args = {}) {
|
||||
auto region = bodyRegion.pushOp(
|
||||
spv::Op::OpImageRead,
|
||||
5 + (operands == spv::ImageOperandsMask::MaskNone ? 0
|
||||
: 1 + args.size()));
|
||||
auto id = newId<VectorOfValue<FloatType>>();
|
||||
region.pushIdUse(resultType);
|
||||
region.pushIdDef(id);
|
||||
region.pushIdUse(image);
|
||||
region.pushIdUse(coords);
|
||||
|
||||
if (operands != spv::ImageOperandsMask::MaskNone) {
|
||||
region.pushWord(static_cast<unsigned>(operands));
|
||||
|
||||
for (auto arg : args) {
|
||||
region.pushIdUse(arg);
|
||||
}
|
||||
}
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
void createImageWrite(
|
||||
ImageValue image, ScalarOrVectorOfValue<UIntType> coords, Value texel,
|
||||
spv::ImageOperandsMask operands = spv::ImageOperandsMask::MaskNone,
|
||||
std::span<const Id> args = {}) {
|
||||
auto region = bodyRegion.pushOp(
|
||||
spv::Op::OpImageWrite,
|
||||
4 + (operands == spv::ImageOperandsMask::MaskNone ? 0
|
||||
: 1 + args.size()));
|
||||
region.pushIdUse(image);
|
||||
region.pushIdUse(coords);
|
||||
region.pushIdUse(texel);
|
||||
|
||||
if (operands != spv::ImageOperandsMask::MaskNone) {
|
||||
region.pushWord(static_cast<unsigned>(operands));
|
||||
|
||||
for (auto arg : args) {
|
||||
region.pushIdUse(arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Value createImageQuerySizeLod(Type resultType, ImageValue image, Value lod) {
|
||||
auto region = bodyRegion.pushOp(spv::Op::OpImageQuerySizeLod, 5);
|
||||
auto id = newId<Value>();
|
||||
|
|
@ -1655,6 +1705,38 @@ private:
|
|||
SpirvBuilder &operator=(SpirvBuilder &&) = default;
|
||||
|
||||
public:
|
||||
bool isIdDefined(Id id) const {
|
||||
std::array regions = {
|
||||
// &capabilityRegion, &extensionRegion, &extInstRegion,
|
||||
// &memoryModelRegion, &entryPointRegion, &executionModeRegion,
|
||||
// &debugRegion, &annotationRegion, &globalRegion,
|
||||
&functionRegion,
|
||||
};
|
||||
|
||||
for (auto reg : regions) {
|
||||
if (reg->isIdDefined(id)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
bool isIdUsed(Id id) const {
|
||||
std::array regions = {
|
||||
&capabilityRegion, &extensionRegion, &extInstRegion,
|
||||
&memoryModelRegion, &entryPointRegion, &executionModeRegion,
|
||||
&debugRegion, &annotationRegion, &globalRegion,
|
||||
&functionDeclRegion, &functionRegion,
|
||||
};
|
||||
|
||||
for (auto reg : regions) {
|
||||
if (reg->isIdUsed(id)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
SpirvBuilder() = default;
|
||||
|
||||
SpirvBuilder(IdGenerator &idGenerator, std::size_t expInstructionsCount)
|
||||
|
|
@ -1696,6 +1778,8 @@ public:
|
|||
functionRegion.clear();
|
||||
}
|
||||
|
||||
IdGenerator *getIdGenerator() const { return mIdGenerator; }
|
||||
|
||||
std::vector<std::uint32_t> build(std::uint32_t spirvVersion,
|
||||
std::uint32_t generatorMagic) {
|
||||
const std::size_t headerSize = 5;
|
||||
|
|
|
|||
|
|
@ -9,6 +9,9 @@ constexpr AccessOp operator|(AccessOp lhs, AccessOp rhs) {
|
|||
constexpr AccessOp operator&(AccessOp lhs, AccessOp rhs) {
|
||||
return static_cast<AccessOp>(static_cast<int>(lhs) & static_cast<int>(rhs));
|
||||
}
|
||||
constexpr AccessOp operator~(AccessOp rhs) {
|
||||
return static_cast<AccessOp>(~static_cast<int>(rhs));
|
||||
}
|
||||
constexpr AccessOp &operator|=(AccessOp &lhs, AccessOp rhs) {
|
||||
return ((lhs = lhs | rhs));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@
|
|||
|
||||
namespace amdgpu::shader {
|
||||
struct Shader {
|
||||
enum class UniformKind { Buffer, Sampler, Image };
|
||||
enum class UniformKind { Buffer, Sampler, StorageImage, Image };
|
||||
|
||||
struct UniformInfo {
|
||||
std::uint32_t binding;
|
||||
|
|
|
|||
|
|
@ -231,6 +231,9 @@ public:
|
|||
spirv::ImageType getImage2DType() {
|
||||
return spirv::cast<spirv::ImageType>(getType(TypeId::Image2D));
|
||||
}
|
||||
spirv::ImageType getStorageImage2DType() {
|
||||
return spirv::cast<spirv::ImageType>(getType(TypeId::StorageImage2D));
|
||||
}
|
||||
spirv::SampledImageType getSampledImage2DType() {
|
||||
return spirv::cast<spirv::SampledImageType>(
|
||||
getType(TypeId::SampledImage2D));
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
#pragma once
|
||||
#include "AccessOp.hpp"
|
||||
#include "RegisterId.hpp"
|
||||
#include "RegisterState.hpp"
|
||||
#include "TypeId.hpp"
|
||||
|
|
@ -36,7 +37,8 @@ struct Fragment {
|
|||
// std::optional<RegisterId> findInput(spirv::Value value);
|
||||
// Value addInput(RegisterId id, spirv::Type type);
|
||||
spirv::SamplerValue createSampler(RegisterId base);
|
||||
spirv::ImageValue createImage(RegisterId base, bool r128); // TODO: params
|
||||
spirv::ImageValue createImage(RegisterId base, bool r128, bool sampled,
|
||||
AccessOp access); // TODO: params
|
||||
Value createCompositeExtract(Value composite, std::uint32_t member);
|
||||
Value getOperand(RegisterId id, TypeId type,
|
||||
OperandGetFlags flags = OperandGetFlags::None);
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ struct TypeId {
|
|||
ArrayFloat32x16,
|
||||
Sampler,
|
||||
Image2D,
|
||||
StorageImage2D,
|
||||
SampledImage2D,
|
||||
|
||||
Void // should be last
|
||||
|
|
|
|||
|
|
@ -8,12 +8,14 @@ struct UniformBindings {
|
|||
static constexpr auto kBufferSlots = 16;
|
||||
static constexpr auto kImageSlots = 16;
|
||||
static constexpr auto kSamplerSlots = 16;
|
||||
static constexpr auto kStorageImageSlots = 16;
|
||||
|
||||
static constexpr auto kBufferOffset = 0;
|
||||
static constexpr auto kImageOffset = kBufferOffset + kBufferSlots;
|
||||
static constexpr auto kSamplerOffset = kImageOffset + kImageSlots;
|
||||
static constexpr auto kStorageImageOffset = kSamplerOffset + kSamplerSlots;
|
||||
|
||||
static constexpr auto kStageSize = kSamplerOffset + kSamplerSlots;
|
||||
static constexpr auto kStageSize = kStorageImageOffset + kStorageImageSlots;
|
||||
|
||||
static constexpr auto kVertexOffset = 0;
|
||||
static constexpr auto kFragmentOffset = kStageSize;
|
||||
|
|
@ -34,6 +36,14 @@ struct UniformBindings {
|
|||
return index + getStageOffset(stage) + kImageOffset;
|
||||
}
|
||||
|
||||
static unsigned getStorageImageBinding(Stage stage, unsigned index) {
|
||||
if (index >= kStorageImageSlots) {
|
||||
util::unreachable();
|
||||
}
|
||||
|
||||
return index + getStageOffset(stage) + kStorageImageOffset;
|
||||
}
|
||||
|
||||
static unsigned getSamplerBinding(Stage stage, unsigned index) {
|
||||
if (index >= kSamplerSlots) {
|
||||
util::unreachable();
|
||||
|
|
|
|||
|
|
@ -371,6 +371,8 @@ amdgpu::shader::convert(RemoteMemory memory, Stage stage, std::uint64_t entry,
|
|||
builder.createCapability(spv::Capability::UniformAndStorageBuffer8BitAccess);
|
||||
builder.createCapability(spv::Capability::UniformAndStorageBuffer16BitAccess);
|
||||
builder.createCapability(spv::Capability::Int64);
|
||||
builder.createCapability(spv::Capability::StorageImageWriteWithoutFormat);
|
||||
builder.createCapability(spv::Capability::StorageImageReadWithoutFormat);
|
||||
builder.setMemoryModel(spv::AddressingModel::Logical,
|
||||
spv::MemoryModel::GLSL450);
|
||||
|
||||
|
|
@ -410,6 +412,7 @@ amdgpu::shader::convert(RemoteMemory memory, Stage stage, std::uint64_t entry,
|
|||
|
||||
std::size_t samplerCount = 0;
|
||||
std::size_t imageCount = 0;
|
||||
std::size_t storageImageCount = 0;
|
||||
std::size_t bufferCount = 0;
|
||||
|
||||
for (auto &uniform : ctxt.getUniforms()) {
|
||||
|
|
@ -427,6 +430,11 @@ amdgpu::shader::convert(RemoteMemory memory, Stage stage, std::uint64_t entry,
|
|||
newUniform.binding =
|
||||
UniformBindings::getSamplerBinding(stage, samplerCount++);
|
||||
break;
|
||||
case TypeId::StorageImage2D:
|
||||
newUniform.kind = Shader::UniformKind::StorageImage;
|
||||
newUniform.binding =
|
||||
UniformBindings::getStorageImageBinding(stage, storageImageCount++);
|
||||
break;
|
||||
case TypeId::Image2D:
|
||||
newUniform.kind = Shader::UniformKind::Image;
|
||||
newUniform.binding =
|
||||
|
|
@ -478,6 +486,14 @@ amdgpu::shader::convert(RemoteMemory memory, Stage stage, std::uint64_t entry,
|
|||
{{dimX, dimY, dimZ}});
|
||||
}
|
||||
|
||||
result.spirv = ctxt.getBuilder().build(SPV_VERSION, 0);
|
||||
// auto maxId = ctxt.getBuilder().getIdGenerator()->bounds;
|
||||
// for (std::size_t i = 1; i < maxId; ++i) {
|
||||
// spirv::Id id;
|
||||
// id.id = i;
|
||||
// if (builder.isIdDefined(id) && !builder.isIdUsed(id)) {
|
||||
// std::printf("ssa variable %%%zu defined, but not used\n", i);
|
||||
// }
|
||||
// }
|
||||
result.spirv = builder.build(SPV_VERSION, 0);
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -136,6 +136,10 @@ spirv::Type ConverterContext::getType(TypeId id) {
|
|||
return ((type = getBuilder().createTypeImage(getFloat32Type(),
|
||||
spv::Dim::Dim2D, 0, 0, 0, 1,
|
||||
spv::ImageFormat::Unknown)));
|
||||
case TypeId::StorageImage2D:
|
||||
return ((type = getBuilder().createTypeImage(getFloat32Type(),
|
||||
spv::Dim::Dim2D, 0, 0, 0, 2,
|
||||
spv::ImageFormat::Unknown)));
|
||||
case TypeId::SampledImage2D:
|
||||
return ((type = getBuilder().createTypeSampledImage(getImage2DType())));
|
||||
|
||||
|
|
|
|||
|
|
@ -81,7 +81,7 @@ std::uint32_t sizeOfFormat(SurfaceFormat format) {
|
|||
case kSurfaceFormat32_32_32_32:
|
||||
return 128;
|
||||
default:
|
||||
util::unreachable();
|
||||
util::unreachable("unsupported format %u", format);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -425,21 +425,79 @@ void convertToFormat(RegisterId sourceRegister, int count, Fragment &fragment,
|
|||
uniformPointerType, uniform->variable,
|
||||
{{fragment.context->getUInt32(0), channelOffset}});
|
||||
|
||||
spirv::Value channelValue;
|
||||
|
||||
switch (channelType) {
|
||||
case kTextureChannelTypeFloat:
|
||||
case kTextureChannelTypeSInt:
|
||||
case kTextureChannelTypeUInt:
|
||||
fragment.builder.createStore(
|
||||
uniformPointerValue,
|
||||
case kTextureChannelTypeUNorm: {
|
||||
channelValue =
|
||||
fragment
|
||||
.getOperand(RegisterId::Raw(sourceRegister + channel),
|
||||
storeType)
|
||||
.value);
|
||||
TypeId::Float32)
|
||||
.value;
|
||||
|
||||
auto maxValue =
|
||||
(static_cast<std::uint64_t>(1) << (channelSize * 8)) - 1;
|
||||
|
||||
channelValue =
|
||||
builder.createFMul(fragment.context->getFloat32Type(),
|
||||
spirv::cast<spirv::FloatValue>(channelValue),
|
||||
fragment.context->getFloat32(maxValue));
|
||||
|
||||
channelValue = builder.createConvertFToU(
|
||||
fragment.context->getType(TypeId::UInt32), channelValue);
|
||||
|
||||
if (storeType != TypeId::UInt32) {
|
||||
channelValue = builder.createUConvert(
|
||||
fragment.context->getType(storeType),
|
||||
spirv::cast<spirv::UIntValue>(channelValue));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kTextureChannelTypeFloat:
|
||||
channelValue =
|
||||
fragment
|
||||
.getOperand(RegisterId::Raw(sourceRegister + channel),
|
||||
TypeId::Float32)
|
||||
.value;
|
||||
|
||||
if (storeType != TypeId::Float32) {
|
||||
channelValue = fragment.builder.createFConvert(
|
||||
fragment.context->getType(storeType), channelValue);
|
||||
}
|
||||
break;
|
||||
|
||||
case kTextureChannelTypeSInt:
|
||||
channelValue =
|
||||
fragment
|
||||
.getOperand(RegisterId::Raw(sourceRegister + channel),
|
||||
TypeId::SInt32)
|
||||
.value;
|
||||
|
||||
if (storeType != TypeId::SInt32) {
|
||||
channelValue = fragment.builder.createSConvert(
|
||||
fragment.context->getType(storeType),
|
||||
spirv::cast<spirv::SIntValue>(channelValue));
|
||||
}
|
||||
break;
|
||||
case kTextureChannelTypeUInt:
|
||||
channelValue =
|
||||
fragment
|
||||
.getOperand(RegisterId::Raw(sourceRegister + channel),
|
||||
TypeId::UInt32)
|
||||
.value;
|
||||
|
||||
if (storeType != TypeId::UInt32) {
|
||||
channelValue = fragment.builder.createUConvert(
|
||||
fragment.context->getType(storeType),
|
||||
spirv::cast<spirv::UIntValue>(channelValue));
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
util::unreachable("unimplemented channel type %u", channelType);
|
||||
}
|
||||
|
||||
fragment.builder.createStore(uniformPointerValue, channelValue);
|
||||
}
|
||||
|
||||
for (; channel < count; ++channel) {
|
||||
|
|
@ -1163,16 +1221,42 @@ void convertVop2(Fragment &fragment, Vop2 inst) {
|
|||
}
|
||||
void convertSop2(Fragment &fragment, Sop2 inst) {
|
||||
fragment.registers->pc += Sop2::kMinInstSize * sizeof(std::uint32_t);
|
||||
auto &builder = fragment.builder;
|
||||
auto context = fragment.context;
|
||||
auto sCarry = [&](spirv::SIntValue a, spirv::SIntValue b,
|
||||
spirv::SIntValue result) {
|
||||
auto boolT = context->getBoolType();
|
||||
auto uint32T = context->getUInt32Type();
|
||||
auto s0 = context->getSInt32(0);
|
||||
auto u1 = context->getUInt32(1);
|
||||
auto u0 = context->getUInt32(0);
|
||||
auto aLtZero = builder.createSelect(
|
||||
uint32T, builder.createSLessThan(boolT, a, s0), u1, u0);
|
||||
auto bLtZero = builder.createSelect(
|
||||
uint32T, builder.createSLessThan(boolT, b, s0), u1, u0);
|
||||
auto resultLtZero = builder.createSelect(
|
||||
uint32T, builder.createSLessThan(boolT, result, s0), u1, u0);
|
||||
|
||||
auto argsSignEq = builder.createIEqual(boolT, aLtZero, bLtZero);
|
||||
auto resSignNe = builder.createINotEqual(boolT, resultLtZero, aLtZero);
|
||||
return Value{boolT, builder.createLogicalAnd(boolT, argsSignEq, resSignNe)};
|
||||
};
|
||||
|
||||
switch (inst.op) {
|
||||
case Sop2::Op::S_ADD_U32: {
|
||||
auto src0 = spirv::cast<spirv::UIntValue>(
|
||||
fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value);
|
||||
auto src1 = spirv::cast<spirv::UIntValue>(
|
||||
fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value);
|
||||
auto resultT = fragment.context->getUInt32Type();
|
||||
auto result = fragment.builder.createIAdd(resultT, src0, src1);
|
||||
fragment.setScc({resultT, result});
|
||||
fragment.setScalarOperand(inst.sdst, {resultT, result});
|
||||
auto src0 = fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value;
|
||||
auto src1 = fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value;
|
||||
auto uintT = fragment.context->getType(TypeId::UInt32);
|
||||
auto resultStruct =
|
||||
fragment.context->getStructType(std::array{uintT, uintT});
|
||||
auto result = fragment.builder.createIAddCarry(resultStruct, src0, src1);
|
||||
fragment.setScalarOperand(
|
||||
inst.sdst,
|
||||
{uintT, fragment.builder.createCompositeExtract(
|
||||
uintT, result, {{static_cast<std::uint32_t>(0)}})});
|
||||
fragment.setScc(
|
||||
{uintT, fragment.builder.createCompositeExtract(
|
||||
uintT, result, {{static_cast<std::uint32_t>(1)}})});
|
||||
break;
|
||||
}
|
||||
case Sop2::Op::S_ADD_I32: {
|
||||
|
|
@ -1182,7 +1266,35 @@ void convertSop2(Fragment &fragment, Sop2 inst) {
|
|||
fragment.getScalarOperand(inst.ssrc1, TypeId::SInt32).value);
|
||||
auto resultT = fragment.context->getSint32Type();
|
||||
auto result = fragment.builder.createIAdd(resultT, src0, src1);
|
||||
fragment.setScc({resultT, result});
|
||||
fragment.setScc(sCarry(src0, src1, result));
|
||||
fragment.setScalarOperand(inst.sdst, {resultT, result});
|
||||
break;
|
||||
}
|
||||
|
||||
case Sop2::Op::S_SUB_U32: {
|
||||
auto src0 = fragment.getScalarOperand(inst.ssrc0, TypeId::UInt32).value;
|
||||
auto src1 = fragment.getScalarOperand(inst.ssrc1, TypeId::UInt32).value;
|
||||
auto uintT = fragment.context->getType(TypeId::UInt32);
|
||||
auto resultStruct =
|
||||
fragment.context->getStructType(std::array{uintT, uintT});
|
||||
auto result = fragment.builder.createISubBorrow(resultStruct, src0, src1);
|
||||
fragment.setScalarOperand(
|
||||
inst.sdst,
|
||||
{uintT, fragment.builder.createCompositeExtract(
|
||||
uintT, result, {{static_cast<std::uint32_t>(0)}})});
|
||||
fragment.setScc(
|
||||
{uintT, fragment.builder.createCompositeExtract(
|
||||
uintT, result, {{static_cast<std::uint32_t>(1)}})});
|
||||
break;
|
||||
}
|
||||
case Sop2::Op::S_SUB_I32: {
|
||||
auto src0 = spirv::cast<spirv::SIntValue>(
|
||||
fragment.getScalarOperand(inst.ssrc0, TypeId::SInt32).value);
|
||||
auto src1 = spirv::cast<spirv::SIntValue>(
|
||||
fragment.getScalarOperand(inst.ssrc1, TypeId::SInt32).value);
|
||||
auto resultT = fragment.context->getSint32Type();
|
||||
auto result = fragment.builder.createISub(resultT, src0, src1);
|
||||
fragment.setScc(sCarry(src0, src1, result));
|
||||
fragment.setScalarOperand(inst.sdst, {resultT, result});
|
||||
break;
|
||||
}
|
||||
|
|
@ -1685,6 +1797,12 @@ void convertVop3(Fragment &fragment, Vop3 inst) {
|
|||
return result;
|
||||
};
|
||||
|
||||
auto roundEven = [&](spirv::Type type, spirv::Value value) {
|
||||
auto glslStd450 = fragment.context->getGlslStd450();
|
||||
return Value{type, fragment.builder.createExtInst(
|
||||
type, glslStd450, GLSLstd450RoundEven, {{value}})};
|
||||
};
|
||||
|
||||
auto cmpOp = [&](TypeId type, CmpKind kind, CmpFlags flags = CmpFlags::None) {
|
||||
auto src0 = fragment.getScalarOperand(inst.src0, type).value;
|
||||
auto src1 = fragment.getScalarOperand(inst.src1, type).value;
|
||||
|
|
@ -2456,7 +2574,7 @@ void convertVop3(Fragment &fragment, Vop3 inst) {
|
|||
floatT, float1, spirv::cast<spirv::FloatValue>(src.value));
|
||||
auto result = applyClamp(applyOmod({floatT, resultValue}));
|
||||
|
||||
fragment.setVectorOperand(inst.vdst, result);
|
||||
fragment.setVectorOperand(inst.vdst, roundEven(result.type, result.value));
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -2475,7 +2593,8 @@ void convertVop3(Fragment &fragment, Vop3 inst) {
|
|||
inst.sdst,
|
||||
{uintT, fragment.builder.createCompositeExtract(
|
||||
uintT, result, std::array{static_cast<std::uint32_t>(1)})});
|
||||
// TODO: update sdst + 1
|
||||
fragment.setScalarOperand(inst.sdst + 1,
|
||||
{uintT, fragment.context->getUInt32(0)});
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -2524,6 +2643,18 @@ void convertVop3(Fragment &fragment, Vop3 inst) {
|
|||
fragment.setVectorOperand(inst.vdst, result);
|
||||
break;
|
||||
}
|
||||
case Vop3::Op::V3_MUL_LO_U32: {
|
||||
auto resultT = fragment.context->getUInt32Type();
|
||||
auto src0 = getSrc(0, TypeId::UInt32);
|
||||
auto src1 = getSrc(1, TypeId::UInt32);
|
||||
auto resultValue = fragment.builder.createIMul(
|
||||
resultT, spirv::cast<spirv::UIntValue>(src0.value),
|
||||
spirv::cast<spirv::UIntValue>(src1.value));
|
||||
auto result = applyClamp(applyOmod({resultT, resultValue}));
|
||||
|
||||
fragment.setVectorOperand(inst.vdst, result);
|
||||
break;
|
||||
}
|
||||
case Vop3::Op::V3_MUL_LO_I32: {
|
||||
auto resultT = fragment.context->getSint32Type();
|
||||
auto src0 = getSrc(0, TypeId::SInt32);
|
||||
|
|
@ -3106,15 +3237,23 @@ void convertMubuf(Fragment &fragment, Mubuf inst) {
|
|||
static_cast<int>(Mubuf::Op::BUFFER_LOAD_FORMAT_X) + 1;
|
||||
|
||||
auto vbuffer = getVBuffer();
|
||||
auto address = getAddress(&vbuffer);
|
||||
if (vbuffer.dfmt != kSurfaceFormatInvalid) {
|
||||
auto address = getAddress(&vbuffer);
|
||||
|
||||
spirv::Value result[4];
|
||||
auto resultType = convertFromFormat(
|
||||
result, count, fragment, reinterpret_cast<std::uint32_t *>(&vbuffer),
|
||||
address, vbuffer.dfmt, vbuffer.nfmt);
|
||||
spirv::Value result[4];
|
||||
auto resultType = convertFromFormat(
|
||||
result, count, fragment, reinterpret_cast<std::uint32_t *>(&vbuffer),
|
||||
address, vbuffer.dfmt, vbuffer.nfmt);
|
||||
|
||||
for (std::uint32_t i = 0; i < count; ++i) {
|
||||
fragment.setVectorOperand(inst.vdata + i, {resultType, result[i]});
|
||||
for (std::uint32_t i = 0; i < count; ++i) {
|
||||
fragment.setVectorOperand(inst.vdata + i, {resultType, result[i]});
|
||||
}
|
||||
} else {
|
||||
auto floatT = fragment.context->getFloat32Type();
|
||||
auto zero = fragment.context->getFloat32(0);
|
||||
for (std::uint32_t i = 0; i < count; ++i) {
|
||||
fragment.setVectorOperand(inst.vdata + i, {floatT, zero});
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
@ -3128,11 +3267,13 @@ void convertMubuf(Fragment &fragment, Mubuf inst) {
|
|||
1;
|
||||
|
||||
auto vbuffer = getVBuffer();
|
||||
auto address = getAddress(&vbuffer);
|
||||
if (vbuffer.dfmt != kSurfaceFormatInvalid) {
|
||||
auto address = getAddress(&vbuffer);
|
||||
|
||||
convertToFormat(RegisterId::Vector(inst.vdata), count, fragment,
|
||||
reinterpret_cast<std::uint32_t *>(&vbuffer), address,
|
||||
vbuffer.dfmt, vbuffer.nfmt);
|
||||
convertToFormat(RegisterId::Vector(inst.vdata), count, fragment,
|
||||
reinterpret_cast<std::uint32_t *>(&vbuffer), address,
|
||||
vbuffer.dfmt, vbuffer.nfmt);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -3196,7 +3337,6 @@ void convertMubuf(Fragment &fragment, Mubuf inst) {
|
|||
|
||||
auto vbuffer = getVBuffer();
|
||||
auto address = getAddress(&vbuffer);
|
||||
auto storeType = fragment.context->getType(TypeId::UInt32);
|
||||
auto uniform = fragment.context->getOrCreateStorageBuffer(
|
||||
reinterpret_cast<std::uint32_t *>(&vbuffer), TypeId::UInt32);
|
||||
uniform->accessOp |= AccessOp::Store;
|
||||
|
|
@ -3365,9 +3505,15 @@ void convertMtbuf(Fragment &fragment, Mtbuf inst) {
|
|||
auto indexMsb = builder.createIMul(
|
||||
uint32T, index_msb, fragment.context->getUInt32(vbuffer->stride));
|
||||
|
||||
auto indexLsb = builder.createIMul(
|
||||
uint32T, index_lsb,
|
||||
fragment.context->getUInt32(vbuffer->element_size));
|
||||
|
||||
address = builder.createIAdd(
|
||||
uint32T, address,
|
||||
builder.createIMul(uint32T, indexMsb, indexStride));
|
||||
|
||||
address = builder.createIAdd(uint32T, address, indexLsb);
|
||||
} else if (offset) {
|
||||
auto indexStride = fragment.context->getUInt32(vbuffer->index_stride);
|
||||
auto elementSize = fragment.context->getUInt32(vbuffer->element_size);
|
||||
|
|
@ -3527,9 +3673,15 @@ void convertMtbuf(Fragment &fragment, Mtbuf inst) {
|
|||
auto indexMsb = builder.createIMul(
|
||||
uint32T, index_msb, fragment.context->getUInt32(vbuffer->stride));
|
||||
|
||||
auto indexLsb = builder.createIMul(
|
||||
uint32T, index_lsb,
|
||||
fragment.context->getUInt32(vbuffer->element_size));
|
||||
|
||||
address = builder.createIAdd(
|
||||
uint32T, address,
|
||||
builder.createIMul(uint32T, indexMsb, indexStride));
|
||||
|
||||
address = builder.createIAdd(uint32T, address, indexLsb);
|
||||
} else if (offset) {
|
||||
auto indexStride = fragment.context->getUInt32(vbuffer->index_stride);
|
||||
auto elementSize = fragment.context->getUInt32(vbuffer->element_size);
|
||||
|
|
@ -3565,8 +3717,9 @@ void convertMimg(Fragment &fragment, Mimg inst) {
|
|||
fragment.registers->pc += Mimg::kMinInstSize * sizeof(std::uint32_t);
|
||||
switch (inst.op) {
|
||||
case Mimg::Op::IMAGE_GET_RESINFO: {
|
||||
auto image =
|
||||
fragment.createImage(RegisterId::Raw(inst.srsrc << 2), inst.r128);
|
||||
auto image = fragment.createImage(RegisterId::Raw(inst.srsrc << 2),
|
||||
inst.r128, true, // fixme, should be any
|
||||
AccessOp::None);
|
||||
spirv::Value values[4];
|
||||
auto uint32T = fragment.context->getUInt32Type();
|
||||
|
||||
|
|
@ -3600,8 +3753,8 @@ void convertMimg(Fragment &fragment, Mimg inst) {
|
|||
}
|
||||
|
||||
case Mimg::Op::IMAGE_SAMPLE_LZ: {
|
||||
auto image =
|
||||
fragment.createImage(RegisterId::Raw(inst.srsrc << 2), inst.r128);
|
||||
auto image = fragment.createImage(RegisterId::Raw(inst.srsrc << 2),
|
||||
inst.r128, true, AccessOp::Load);
|
||||
auto sampler = fragment.createSampler(RegisterId::Raw(inst.ssamp << 2));
|
||||
auto coord0 = fragment.getVectorOperand(inst.vaddr, TypeId::Float32).value;
|
||||
auto coord1 =
|
||||
|
|
@ -3633,8 +3786,8 @@ void convertMimg(Fragment &fragment, Mimg inst) {
|
|||
}
|
||||
|
||||
case Mimg::Op::IMAGE_SAMPLE: {
|
||||
auto image =
|
||||
fragment.createImage(RegisterId::Raw(inst.srsrc << 2), inst.r128);
|
||||
auto image = fragment.createImage(RegisterId::Raw(inst.srsrc << 2),
|
||||
inst.r128, true, AccessOp::Load);
|
||||
auto sampler = fragment.createSampler(RegisterId::Raw(inst.ssamp << 2));
|
||||
auto coord0 = fragment.getVectorOperand(inst.vaddr, TypeId::Float32).value;
|
||||
auto coord1 =
|
||||
|
|
@ -3664,6 +3817,66 @@ void convertMimg(Fragment &fragment, Mimg inst) {
|
|||
break;
|
||||
}
|
||||
|
||||
case Mimg::Op::IMAGE_STORE:
|
||||
case Mimg::Op::IMAGE_STORE_MIP: {
|
||||
auto image = fragment.createImage(RegisterId::Raw(inst.srsrc << 2),
|
||||
inst.r128, false, AccessOp::Store);
|
||||
auto coord0 = fragment.getVectorOperand(inst.vaddr, TypeId::UInt32).value;
|
||||
auto coord1 =
|
||||
fragment.getVectorOperand(inst.vaddr + 1, TypeId::UInt32).value;
|
||||
auto coord2 =
|
||||
fragment.getVectorOperand(inst.vaddr + 2, TypeId::UInt32).value;
|
||||
auto coords = fragment.builder.createCompositeConstruct(
|
||||
fragment.context->getUint32x3Type(),
|
||||
{{coord0, coord1, coord2}}); // TODO
|
||||
|
||||
auto float4T = fragment.context->getFloat32x4Type();
|
||||
spirv::Value values[4];
|
||||
|
||||
for (std::uint32_t dstOffset = 0, i = 0; i < 4; ++i) {
|
||||
if (inst.dmask & (1 << i)) {
|
||||
values[i] =
|
||||
fragment.getVectorOperand(inst.vdata + dstOffset++, TypeId::Float32)
|
||||
.value;
|
||||
} else {
|
||||
values[i] = fragment.context->getFloat32(0);
|
||||
}
|
||||
}
|
||||
|
||||
auto value = fragment.builder.createCompositeConstruct(float4T, values);
|
||||
fragment.builder.createImageWrite(image, coords, value);
|
||||
break;
|
||||
}
|
||||
|
||||
case Mimg::Op::IMAGE_LOAD:
|
||||
case Mimg::Op::IMAGE_LOAD_MIP: {
|
||||
auto image = fragment.createImage(RegisterId::Raw(inst.srsrc << 2),
|
||||
inst.r128, false, AccessOp::Load);
|
||||
auto coord0 = fragment.getVectorOperand(inst.vaddr, TypeId::UInt32).value;
|
||||
auto coord1 =
|
||||
fragment.getVectorOperand(inst.vaddr + 1, TypeId::UInt32).value;
|
||||
auto coord2 =
|
||||
fragment.getVectorOperand(inst.vaddr + 2, TypeId::UInt32).value;
|
||||
auto coords = fragment.builder.createCompositeConstruct(
|
||||
fragment.context->getUint32x3Type(),
|
||||
{{coord0, coord1, coord2}}); // TODO
|
||||
|
||||
auto float4T = fragment.context->getFloat32x4Type();
|
||||
auto floatT = fragment.context->getFloat32Type();
|
||||
|
||||
auto value = fragment.builder.createImageRead(float4T, image, coords);
|
||||
|
||||
for (std::uint32_t dstOffset = 0, i = 0; i < 4; ++i) {
|
||||
if (inst.dmask & (1 << i)) {
|
||||
fragment.setVectorOperand(
|
||||
inst.vdata + dstOffset++,
|
||||
{floatT,
|
||||
fragment.builder.createCompositeExtract(floatT, value, {{i}})});
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case Mimg::Op::IMAGE_GET_LOD: {
|
||||
auto intT = fragment.context->getUInt32Type();
|
||||
for (std::uint32_t dstOffset = 0, i = 0; i < 4; ++i) {
|
||||
|
|
@ -3799,6 +4012,14 @@ void convertExp(Fragment &fragment, Exp inst) {
|
|||
|
||||
void convertVop1(Fragment &fragment, Vop1 inst) {
|
||||
fragment.registers->pc += Vop1::kMinInstSize * sizeof(std::uint32_t);
|
||||
auto roundEven = [&](spirv::Type type, spirv::Value value) {
|
||||
// auto glslStd450 = fragment.context->getGlslStd450();
|
||||
// return Value{type, fragment.builder.createExtInst(
|
||||
// type, glslStd450, GLSLstd450RoundEven,
|
||||
// {{value}})};
|
||||
return Value{type, value};
|
||||
};
|
||||
|
||||
switch (inst.op) {
|
||||
case Vop1::Op::V_MOV_B32:
|
||||
fragment.setVectorOperand(
|
||||
|
|
@ -3806,6 +4027,22 @@ void convertVop1(Fragment &fragment, Vop1 inst) {
|
|||
OperandGetFlags::PreserveType));
|
||||
break;
|
||||
|
||||
case Vop1::Op::V_RCP_IFLAG_F32: {
|
||||
auto src = spirv::cast<spirv::FloatValue>(
|
||||
fragment.getScalarOperand(inst.src0, TypeId::Float32).value);
|
||||
auto floatT = fragment.context->getFloat32Type();
|
||||
|
||||
auto isNotZero = fragment.builder.createFOrdNotEqual(
|
||||
fragment.context->getBoolType(), src, fragment.context->getFloat32(0));
|
||||
|
||||
src = fragment.builder.createSelect(
|
||||
floatT, isNotZero, src, fragment.context->getFloat32(0.0000001));
|
||||
auto float1 = fragment.context->getFloat32(1);
|
||||
auto result = fragment.builder.createFDiv(floatT, float1, src);
|
||||
|
||||
fragment.setVectorOperand(inst.vdst, roundEven(floatT, result));
|
||||
break;
|
||||
}
|
||||
case Vop1::Op::V_RCP_F32: {
|
||||
auto src = spirv::cast<spirv::FloatValue>(
|
||||
fragment.getScalarOperand(inst.src0, TypeId::Float32).value);
|
||||
|
|
@ -3813,7 +4050,7 @@ void convertVop1(Fragment &fragment, Vop1 inst) {
|
|||
auto float1 = fragment.context->getFloat32(1);
|
||||
auto result = fragment.builder.createFDiv(floatT, float1, src);
|
||||
|
||||
fragment.setVectorOperand(inst.vdst, {floatT, result});
|
||||
fragment.setVectorOperand(inst.vdst, roundEven(floatT, result));
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -3870,7 +4107,7 @@ void convertVop1(Fragment &fragment, Vop1 inst) {
|
|||
auto result = fragment.builder.createExtInst(floatT, glslStd450,
|
||||
GLSLstd450Exp2, {{src}});
|
||||
|
||||
fragment.setVectorOperand(inst.vdst, {floatT, result});
|
||||
fragment.setVectorOperand(inst.vdst, roundEven(floatT, result));
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -3946,7 +4183,7 @@ void convertVop1(Fragment &fragment, Vop1 inst) {
|
|||
auto result = fragment.builder.createExtInst(floatT, glslStd450,
|
||||
GLSLstd450Sin, {{src}});
|
||||
|
||||
fragment.setVectorOperand(inst.vdst, {floatT, result});
|
||||
fragment.setVectorOperand(inst.vdst, roundEven(floatT, result));
|
||||
break;
|
||||
}
|
||||
case Vop1::Op::V_COS_F32: {
|
||||
|
|
@ -3960,7 +4197,7 @@ void convertVop1(Fragment &fragment, Vop1 inst) {
|
|||
auto result = fragment.builder.createExtInst(floatT, glslStd450,
|
||||
GLSLstd450Cos, {{src}});
|
||||
|
||||
fragment.setVectorOperand(inst.vdst, {floatT, result});
|
||||
fragment.setVectorOperand(inst.vdst, roundEven(floatT, result));
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -5253,7 +5490,8 @@ spirv::SamplerValue Fragment::createSampler(RegisterId base) {
|
|||
}
|
||||
}
|
||||
|
||||
spirv::ImageValue Fragment::createImage(RegisterId base, bool r128) {
|
||||
spirv::ImageValue Fragment::createImage(RegisterId base, bool r128,
|
||||
bool sampled, AccessOp access) {
|
||||
auto tBuffer0 = getOperand(RegisterId::Raw(base + 0), TypeId::UInt32);
|
||||
auto tBuffer1 = getOperand(RegisterId::Raw(base + 1), TypeId::UInt32);
|
||||
auto tBuffer2 = getOperand(RegisterId::Raw(base + 2), TypeId::UInt32);
|
||||
|
|
@ -5269,6 +5507,10 @@ spirv::ImageValue Fragment::createImage(RegisterId base, bool r128) {
|
|||
util::unreachable();
|
||||
}
|
||||
|
||||
auto imageTypeId = sampled ? TypeId::Image2D : TypeId::StorageImage2D;
|
||||
auto imageType =
|
||||
sampled ? context->getImage2DType() : context->getStorageImage2DType();
|
||||
|
||||
if (r128) {
|
||||
std::uint32_t sbuffer[] = {
|
||||
*optTBuffer0Value,
|
||||
|
|
@ -5278,8 +5520,9 @@ spirv::ImageValue Fragment::createImage(RegisterId base, bool r128) {
|
|||
};
|
||||
|
||||
auto uniform = context->getOrCreateUniformConstant(
|
||||
sbuffer, std::size(sbuffer), TypeId::Image2D);
|
||||
return builder.createLoad(context->getImage2DType(), uniform->variable);
|
||||
sbuffer, std::size(sbuffer), imageTypeId);
|
||||
uniform->accessOp |= access;
|
||||
return builder.createLoad(imageType, uniform->variable);
|
||||
}
|
||||
|
||||
auto tBuffer4 = getOperand(RegisterId::Raw(base + 4), TypeId::UInt32);
|
||||
|
|
@ -5304,8 +5547,9 @@ spirv::ImageValue Fragment::createImage(RegisterId base, bool r128) {
|
|||
};
|
||||
|
||||
auto uniform = context->getOrCreateUniformConstant(
|
||||
sbuffer, std::size(sbuffer), TypeId::Image2D);
|
||||
return builder.createLoad(context->getImage2DType(), uniform->variable);
|
||||
sbuffer, std::size(sbuffer), imageTypeId);
|
||||
uniform->accessOp |= access;
|
||||
return builder.createLoad(imageType, uniform->variable);
|
||||
}
|
||||
|
||||
Value Fragment::createCompositeExtract(Value composite, std::uint32_t member) {
|
||||
|
|
@ -5697,15 +5941,15 @@ void amdgpu::shader::Fragment::convert(std::uint64_t size) {
|
|||
|
||||
while (ptr < endptr) {
|
||||
Instruction inst(ptr);
|
||||
// auto startPoint = builder.bodyRegion.getCurrentPosition();
|
||||
auto startPoint = builder.bodyRegion.getCurrentPosition();
|
||||
|
||||
// std::printf("===============\n");
|
||||
// inst.dump();
|
||||
// std::printf("\n");
|
||||
std::printf("===============\n");
|
||||
inst.dump();
|
||||
std::printf("\n");
|
||||
convertInstruction(*this, inst);
|
||||
|
||||
// std::printf("-------------->\n");
|
||||
// spirv::dump(builder.bodyRegion.getCurrentPosition() - startPoint);
|
||||
std::printf("-------------->\n");
|
||||
spirv::dump(builder.bodyRegion.getCurrentPosition() - startPoint);
|
||||
|
||||
ptr += inst.size();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
#include "TypeId.hpp"
|
||||
#include "util/unreachable.hpp"
|
||||
#include <cstdint>
|
||||
|
||||
amdgpu::shader::TypeId amdgpu::shader::TypeId::getBaseType() const {
|
||||
switch (raw) {
|
||||
|
|
@ -19,6 +18,7 @@ amdgpu::shader::TypeId amdgpu::shader::TypeId::getBaseType() const {
|
|||
case TypeId::Float64:
|
||||
case TypeId::Sampler:
|
||||
case TypeId::Image2D:
|
||||
case TypeId::StorageImage2D:
|
||||
case TypeId::SampledImage2D:
|
||||
return raw;
|
||||
|
||||
|
|
@ -44,6 +44,7 @@ std::size_t amdgpu::shader::TypeId::getSize() const {
|
|||
switch (raw) {
|
||||
case TypeId::Void:
|
||||
case TypeId::Sampler:
|
||||
case TypeId::StorageImage2D:
|
||||
case TypeId::Image2D:
|
||||
case TypeId::SampledImage2D:
|
||||
return 0;
|
||||
|
|
@ -124,6 +125,7 @@ std::size_t amdgpu::shader::TypeId::getElementsCount() const {
|
|||
case TypeId::Void:
|
||||
case TypeId::Sampler:
|
||||
case TypeId::Image2D:
|
||||
case TypeId::StorageImage2D:
|
||||
case TypeId::SampledImage2D:
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
#include "cf.hpp"
|
||||
#include <cassert>
|
||||
#include <fstream>
|
||||
#include <cstdlib>
|
||||
#include <unordered_set>
|
||||
|
||||
void cf::BasicBlock::split(BasicBlock *target) {
|
||||
|
|
|
|||
|
|
@ -1,8 +1,5 @@
|
|||
#include "scf.hpp"
|
||||
#include "cf.hpp"
|
||||
#include <cassert>
|
||||
#include <fstream>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
|
||||
void scf::Block::eraseFrom(Node *endBefore) {
|
||||
|
|
|
|||
Loading…
Reference in a new issue