mirror of
https://github.com/RPCSX/rpcsx.git
synced 2026-01-20 15:40:30 +01:00
rpcsx-gpu: fix flip image acquire
fix cmpx fix cs init ordering implement s_mulk_i32, s_abs_i32, s_cmovk_i32, s_cmov_b32 and s_cmov_b64 fix s_mul_i32 fix s_cbranch_* for cs
This commit is contained in:
parent
23226c966a
commit
113abf20e5
|
|
@ -1299,21 +1299,21 @@ Cache::ImageView Cache::Tag::getImageView(const ImageKey &key, Access access) {
|
|||
|
||||
void Cache::Tag::readMemory(void *target, std::uint64_t address,
|
||||
std::uint64_t size) {
|
||||
mParent->flush(*mScheduler, address, size);
|
||||
// mParent->flush(*mScheduler, address, size);
|
||||
auto memoryPtr = RemoteMemory{mParent->mVmIm}.getPointer(address);
|
||||
std::memcpy(target, memoryPtr, size);
|
||||
}
|
||||
|
||||
void Cache::Tag::writeMemory(const void *source, std::uint64_t address,
|
||||
std::uint64_t size) {
|
||||
mParent->flush(*mScheduler, address, size);
|
||||
// mParent->invalidate(*mScheduler, address, size);
|
||||
auto memoryPtr = RemoteMemory{mParent->mVmIm}.getPointer(address);
|
||||
std::memcpy(memoryPtr, source, size);
|
||||
}
|
||||
|
||||
int Cache::Tag::compareMemory(const void *source, std::uint64_t address,
|
||||
std::uint64_t size) {
|
||||
mParent->flush(*mScheduler, address, size);
|
||||
// mParent->flush(*mScheduler, address, size);
|
||||
auto memoryPtr = RemoteMemory{mParent->mVmIm}.getPointer(address);
|
||||
return std::memcmp(memoryPtr, source, size);
|
||||
}
|
||||
|
|
@ -1348,14 +1348,18 @@ void Cache::Tag::release() {
|
|||
return;
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<Entry>> tmpResources;
|
||||
while (!mStorage->mAcquiredResources.empty()) {
|
||||
auto resource = std::move(mStorage->mAcquiredResources.back());
|
||||
mStorage->mAcquiredResources.pop_back();
|
||||
resource->flush(*this, *mScheduler, 0, ~static_cast<std::uint64_t>(0));
|
||||
tmpResources.push_back(std::move(resource));
|
||||
}
|
||||
|
||||
mScheduler->submit();
|
||||
mScheduler->wait();
|
||||
if (!tmpResources.empty()) {
|
||||
mScheduler->submit();
|
||||
mScheduler->wait();
|
||||
}
|
||||
|
||||
mStorage->clear();
|
||||
auto storageIndex = mStorage - mParent->mTagStorages;
|
||||
|
|
@ -1865,6 +1869,10 @@ Cache::Cache(Device *device, int vmId) : mDevice(device), mVmIm(vmId) {
|
|||
}
|
||||
|
||||
Cache::~Cache() {
|
||||
for (auto &samp : mSamplers) {
|
||||
vkDestroySampler(vk::context->device, samp.second, vk::context->allocator);
|
||||
}
|
||||
|
||||
vkDestroyDescriptorPool(vk::context->device, mDescriptorPool,
|
||||
vk::context->allocator);
|
||||
|
||||
|
|
|
|||
|
|
@ -242,8 +242,7 @@ transitionImageLayout(VkCommandBuffer commandBuffer, VkImage image,
|
|||
}
|
||||
|
||||
bool Device::flip(std::int64_t pid, int bufferIndex, std::uint64_t arg,
|
||||
VkCommandBuffer commandBuffer, VkImage swapchainImage,
|
||||
VkImageView swapchainImageView, VkFence fence) {
|
||||
VkImage swapchainImage, VkImageView swapchainImageView) {
|
||||
auto &pipe = graphicsPipes[0];
|
||||
auto &scheduler = pipe.scheduler;
|
||||
auto &process = processInfo[pid];
|
||||
|
|
@ -292,15 +291,11 @@ bool Device::flip(std::int64_t pid, int bufferIndex, std::uint64_t arg,
|
|||
}
|
||||
|
||||
// std::printf("displaying buffer %lx\n", buffer.address);
|
||||
VkCommandBufferBeginInfo beginInfo{};
|
||||
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
||||
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
|
||||
|
||||
vkBeginCommandBuffer(commandBuffer, &beginInfo);
|
||||
|
||||
auto cacheTag = getCacheTag(process.vmId, scheduler);
|
||||
auto &sched = cacheTag.getScheduler();
|
||||
|
||||
transitionImageLayout(commandBuffer, swapchainImage,
|
||||
transitionImageLayout(sched.getCommandBuffer(), swapchainImage,
|
||||
VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
||||
{
|
||||
|
|
@ -310,11 +305,11 @@ bool Device::flip(std::int64_t pid, int bufferIndex, std::uint64_t arg,
|
|||
});
|
||||
|
||||
amdgpu::flip(
|
||||
cacheTag, commandBuffer, vk::context->swapchainExtent, buffer.address,
|
||||
cacheTag, vk::context->swapchainExtent, buffer.address,
|
||||
swapchainImageView, {bufferAttr.width, bufferAttr.height}, flipType,
|
||||
getDefaultTileModes()[bufferAttr.tilingMode == 1 ? 10 : 8], dfmt, nfmt);
|
||||
|
||||
transitionImageLayout(commandBuffer, swapchainImage,
|
||||
transitionImageLayout(sched.getCommandBuffer(), swapchainImage,
|
||||
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
||||
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
|
||||
{
|
||||
|
|
@ -323,10 +318,25 @@ bool Device::flip(std::int64_t pid, int bufferIndex, std::uint64_t arg,
|
|||
.layerCount = 1,
|
||||
});
|
||||
|
||||
sched.submit();
|
||||
|
||||
auto submitCompleteTask = scheduler.createExternalSubmit();
|
||||
|
||||
{
|
||||
vkEndCommandBuffer(commandBuffer);
|
||||
VkSemaphoreSubmitInfo waitSemSubmitInfos[] = {
|
||||
{
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
|
||||
.semaphore = vk::context->presentCompleteSemaphore,
|
||||
.value = 1,
|
||||
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
||||
},
|
||||
{
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
|
||||
.semaphore = scheduler.getSemaphoreHandle(),
|
||||
.value = submitCompleteTask - 1,
|
||||
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
||||
},
|
||||
};
|
||||
|
||||
VkSemaphoreSubmitInfo signalSemSubmitInfos[] = {
|
||||
{
|
||||
|
|
@ -343,38 +353,15 @@ bool Device::flip(std::int64_t pid, int bufferIndex, std::uint64_t arg,
|
|||
},
|
||||
};
|
||||
|
||||
VkSemaphoreSubmitInfo waitSemSubmitInfos[] = {
|
||||
{
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
|
||||
.semaphore = vk::context->presentCompleteSemaphore,
|
||||
.value = 1,
|
||||
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
||||
},
|
||||
{
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
|
||||
.semaphore = scheduler.getSemaphoreHandle(),
|
||||
.value = submitCompleteTask - 1,
|
||||
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
||||
},
|
||||
};
|
||||
|
||||
VkCommandBufferSubmitInfo cmdBufferSubmitInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
|
||||
.commandBuffer = commandBuffer,
|
||||
};
|
||||
|
||||
VkSubmitInfo2 submitInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
|
||||
.waitSemaphoreInfoCount = 1,
|
||||
.waitSemaphoreInfoCount = 2,
|
||||
.pWaitSemaphoreInfos = waitSemSubmitInfos,
|
||||
.commandBufferInfoCount = 1,
|
||||
.pCommandBufferInfos = &cmdBufferSubmitInfo,
|
||||
.signalSemaphoreInfoCount = 2,
|
||||
.pSignalSemaphoreInfos = signalSemSubmitInfos,
|
||||
};
|
||||
|
||||
vkQueueSubmit2(vk::context->presentQueue, 1, &submitInfo, fence);
|
||||
vkQueueWaitIdle(vk::context->presentQueue);
|
||||
vkQueueSubmit2(vk::context->presentQueue, 1, &submitInfo, VK_NULL_HANDLE);
|
||||
}
|
||||
|
||||
scheduler.then([=, this, cacheTag = std::move(cacheTag)] {
|
||||
|
|
|
|||
|
|
@ -86,8 +86,7 @@ struct Device {
|
|||
std::uint64_t size);
|
||||
bool processPipes();
|
||||
bool flip(std::int64_t pid, int bufferIndex, std::uint64_t arg,
|
||||
VkCommandBuffer commandBuffer, VkImage swapchainImage,
|
||||
VkImageView swapchainImageView, VkFence fence);
|
||||
VkImage swapchainImage, VkImageView swapchainImageView);
|
||||
void mapMemory(std::int64_t pid, std::uint64_t address, std::uint64_t size,
|
||||
int memoryType, int dmemIndex, int prot, std::int64_t offset);
|
||||
void registerBuffer(std::int64_t pid, bridge::CmdBuffer buffer);
|
||||
|
|
|
|||
|
|
@ -285,7 +285,8 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
renderTargetInfo.extent.height = vkViewPortScissor.extent.height;
|
||||
renderTargetInfo.extent.depth = 1;
|
||||
renderTargetInfo.dfmt = cbColor.info.dfmt;
|
||||
renderTargetInfo.nfmt = gnm::toNumericFormat(cbColor.info.nfmt, cbColor.info.dfmt);
|
||||
renderTargetInfo.nfmt =
|
||||
gnm::toNumericFormat(cbColor.info.nfmt, cbColor.info.dfmt);
|
||||
renderTargetInfo.mipCount = 1;
|
||||
renderTargetInfo.arrayLayerCount = 1;
|
||||
|
||||
|
|
@ -423,6 +424,7 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
|
||||
cacheTag.buildDescriptors(descriptorSets[0]);
|
||||
|
||||
pipe.scheduler.submit();
|
||||
pipe.scheduler.afterSubmit([cacheTag = std::move(cacheTag)] {});
|
||||
|
||||
auto commandBuffer = pipe.scheduler.getCommandBuffer();
|
||||
|
|
@ -479,11 +481,14 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
vkCmdSetStencilReference(commandBuffer, VK_STENCIL_FACE_FRONT_AND_BACK, 0);
|
||||
|
||||
VkCullModeFlags cullMode = VK_CULL_MODE_NONE;
|
||||
|
||||
if (pipe.uConfig.vgtPrimitiveType != gnm::PrimitiveType::RectList) {
|
||||
if (pipe.context.paSuScModeCntl.cullBack) {
|
||||
cullMode |= VK_CULL_MODE_BACK_BIT;
|
||||
}
|
||||
if (pipe.context.paSuScModeCntl.cullFront) {
|
||||
cullMode |= VK_CULL_MODE_FRONT_BIT;
|
||||
}
|
||||
}
|
||||
|
||||
vkCmdSetCullMode(commandBuffer, cullMode);
|
||||
|
|
@ -512,6 +517,7 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
|
|||
|
||||
vkCmdEndRendering(commandBuffer);
|
||||
pipe.scheduler.submit();
|
||||
pipe.scheduler.wait();
|
||||
}
|
||||
|
||||
void amdgpu::dispatch(Cache &cache, Scheduler &sched,
|
||||
|
|
@ -530,14 +536,15 @@ void amdgpu::dispatch(Cache &cache, Scheduler &sched,
|
|||
pipelineLayout, 0, 1, &descriptorSet, 0, nullptr);
|
||||
vk::CmdBindShadersEXT(commandBuffer, 1, stages, &shader.handle);
|
||||
vkCmdDispatch(commandBuffer, groupCountX, groupCountY, groupCountZ);
|
||||
sched.afterSubmit([tag = std::move(tag)] {});
|
||||
sched.submit();
|
||||
sched.wait();
|
||||
}
|
||||
|
||||
void amdgpu::flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer,
|
||||
VkExtent2D targetExtent, std::uint64_t address,
|
||||
VkImageView target, VkExtent2D imageExtent, FlipType type,
|
||||
TileMode tileMode, gnm::DataFormat dfmt,
|
||||
gnm::NumericFormat nfmt) {
|
||||
void amdgpu::flip(Cache::Tag &cacheTag, VkExtent2D targetExtent,
|
||||
std::uint64_t address, VkImageView target,
|
||||
VkExtent2D imageExtent, FlipType type, TileMode tileMode,
|
||||
gnm::DataFormat dfmt, gnm::NumericFormat nfmt) {
|
||||
ImageKey framebuffer{};
|
||||
framebuffer.readAddress = address;
|
||||
framebuffer.type = gnm::TextureType::Dim2D;
|
||||
|
|
@ -601,8 +608,7 @@ void amdgpu::flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer,
|
|||
.pColorAttachments = colorAttachments,
|
||||
};
|
||||
|
||||
commandBuffer = cacheTag.getScheduler().getCommandBuffer();
|
||||
|
||||
auto commandBuffer = cacheTag.getScheduler().getCommandBuffer();
|
||||
vkCmdBeginRendering(commandBuffer, &renderInfo);
|
||||
|
||||
cacheTag.getDevice()->flipPipeline.bind(cacheTag.getScheduler(), type,
|
||||
|
|
@ -613,5 +619,4 @@ void amdgpu::flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer,
|
|||
|
||||
vkCmdDraw(commandBuffer, 6, 1, 0, 0);
|
||||
vkCmdEndRendering(commandBuffer);
|
||||
cacheTag.getScheduler().submit();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,8 +15,7 @@ void dispatch(Cache &cache, Scheduler &sched,
|
|||
Registers::ComputeConfig &computeConfig,
|
||||
std::uint32_t groupCountX, std::uint32_t groupCountY,
|
||||
std::uint32_t groupCountZ);
|
||||
void flip(Cache::Tag &cacheTag, VkCommandBuffer commandBuffer,
|
||||
VkExtent2D targetExtent, std::uint64_t address, VkImageView target,
|
||||
VkExtent2D imageExtent, FlipType type, TileMode tileMode,
|
||||
gnm::DataFormat dfmt, gnm::NumericFormat nfmt);
|
||||
void flip(Cache::Tag &cacheTag, VkExtent2D targetExtent, std::uint64_t address,
|
||||
VkImageView target, VkExtent2D imageExtent, FlipType type,
|
||||
TileMode tileMode, gnm::DataFormat dfmt, gnm::NumericFormat nfmt);
|
||||
} // namespace amdgpu
|
||||
|
|
|
|||
|
|
@ -116,7 +116,7 @@ struct Environment {
|
|||
std::uint8_t numThreadX;
|
||||
std::uint8_t numThreadY;
|
||||
std::uint8_t numThreadZ;
|
||||
bool supportsBarycentric = true;
|
||||
bool supportsBarycentric = false;
|
||||
bool supportsInt8 = false;
|
||||
bool supportsInt64Atomics = false;
|
||||
bool supportsNonSemanticInfo = false;
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@
|
|||
|
||||
#define ClampInfToFltMax(x) (isinf(x) ? ((x) < 0 ? -FLT_MAX : FLT_MAX) : (x))
|
||||
#define ConvertInfToZero(x) (isinf(x) ? 0.0 : (x))
|
||||
#define Rsqrt(x) (1.0 / sqrt(x))
|
||||
#define Rsqrt(x) (inversesqrt(x))
|
||||
#define Rcp(x) (1.0 / x)
|
||||
|
||||
#define U32ARRAY_FETCH_BITS(ARRAY, START, BITCOUNT) ((ARRAY[(START) >> 5] >> ((START) & 31)) & ((1 << (BITCOUNT)) - 1))
|
||||
|
|
@ -577,10 +577,12 @@ void set_cond_thread_bit(inout uint64_t sdst, bool cond) {
|
|||
|
||||
void set_cond_thread_bit_exec(inout uint64_t sdst, bool cond) {
|
||||
uint64_t bit = uint64_t(1) << thread_id;
|
||||
if (cond && (exec & bit) != 0) {
|
||||
if (cond) {
|
||||
sdst |= bit;
|
||||
exec |= bit;
|
||||
} else {
|
||||
sdst &= ~bit;
|
||||
exec &= ~bit;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -995,6 +997,23 @@ void s_cmpk_le_u32(uint32_t a, uint32_t b) { scc = a <= b; }
|
|||
void s_cmpk_lt_u32(uint32_t a, uint32_t b) { scc = a < b; }
|
||||
void s_cmpk_lg_u32(uint32_t a, uint32_t b) { scc = a != b; }
|
||||
|
||||
void s_cmovk_i32(out uint32_t sdst, uint32_t value) {
|
||||
if (scc) {
|
||||
sdst = value;
|
||||
}
|
||||
}
|
||||
|
||||
void s_cmov_b32(out uint32_t sdst, uint32_t value) {
|
||||
if (scc) {
|
||||
sdst = value;
|
||||
}
|
||||
}
|
||||
|
||||
void s_cmov_b64(out uint64_t sdst, uint64_t value) {
|
||||
if (scc) {
|
||||
sdst = value;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t s_not_b32(uint32_t x) {
|
||||
uint32_t result = ~x;
|
||||
|
|
@ -1236,7 +1255,13 @@ int32_t s_ashr_i32(int32_t x, uint32_t y) { int32_t result = x >> (y & 0x1f); sc
|
|||
int64_t s_ashr_i64(int64_t x, uint32_t y) { int64_t result = x >> (y & 0x3f); scc = result != 0; return result; }
|
||||
uint32_t s_bfm_b32(uint32_t x, uint32_t y) { uint32_t result = ((1 << (x & 0x1f)) - 1) << (y & 0x1f); scc = result != 0; return result; }
|
||||
uint64_t s_bfm_b64(uint64_t x, uint64_t y) { uint64_t result = ((uint64_t(1) << (x & 0x1f)) - 1) << (y & 0x1f); scc = result != 0; return result; }
|
||||
int32_t s_mul_i32(int32_t x, int32_t y) { int32_t result = x * y; scc = result != 0; return result; }
|
||||
int32_t s_mul_i32(int32_t x, int32_t y) { return x * y; }
|
||||
int32_t s_mulk_i32(int32_t x, int32_t y) { return x * y; }
|
||||
int32_t s_abs_i32(int32_t x) {
|
||||
int32_t result = abs(x);
|
||||
scc = result == 0;
|
||||
return result;
|
||||
}
|
||||
uint32_t s_bfe_u32(uint32_t x, uint32_t y) {
|
||||
uint32_t offset = y & 0x1f;
|
||||
uint32_t width = (y >> 16) & 0x7f;
|
||||
|
|
@ -2168,10 +2193,10 @@ void s_dcache_inv() {
|
|||
|
||||
bool s_cbranch_scc0() { return scc == false; }
|
||||
bool s_cbranch_scc1() { return scc == true; }
|
||||
bool s_cbranch_vccz() { return vcc == 0; }
|
||||
bool s_cbranch_vccnz() { return vcc != 0; }
|
||||
bool s_cbranch_execz() { return exec == 0; }
|
||||
bool s_cbranch_execnz() { return exec != 0; }
|
||||
bool s_cbranch_vccz() { return (vcc & (uint64_t(1) << thread_id)) == 0; }
|
||||
bool s_cbranch_vccnz() { return (vcc & (uint64_t(1) << thread_id)) != 0; }
|
||||
bool s_cbranch_execz() { return (exec & (uint64_t(1) << thread_id)) == 0; }
|
||||
bool s_cbranch_execnz() { return (exec & (uint64_t(1) << thread_id)) != 0; }
|
||||
|
||||
|
||||
// DS
|
||||
|
|
|
|||
|
|
@ -1577,12 +1577,6 @@ static void createInitialValues(GcnConverter &converter,
|
|||
}
|
||||
}
|
||||
|
||||
for (std::int32_t i = 0; i < 3; ++i) {
|
||||
auto value = builder.createSpvCompositeExtract(loc, uintT,
|
||||
localInvocationId, {{i}});
|
||||
context.writeReg(loc, builder, gcn::RegId::Vgpr, i, value);
|
||||
}
|
||||
|
||||
auto workgroupSize = builder.createSpvCompositeConstruct(
|
||||
loc, uvec3T,
|
||||
{{context.imm32(env.numThreadX), context.imm32(env.numThreadY),
|
||||
|
|
@ -1590,12 +1584,19 @@ static void createInitialValues(GcnConverter &converter,
|
|||
auto workgroupSizeLocVar =
|
||||
converter.createLocalVariable(builder, loc, workgroupSize);
|
||||
|
||||
builder.createValue(loc, ir::amdgpu::CS_SET_INITIAL_EXEC,
|
||||
context.getTypeVoid(), localInvocationIdLocVar,
|
||||
workgroupSizeLocVar);
|
||||
builder.createValue(loc, ir::amdgpu::CS_SET_THREAD_ID,
|
||||
context.getTypeVoid(), localInvocationIdLocVar,
|
||||
workgroupSizeLocVar);
|
||||
|
||||
builder.createValue(loc, ir::amdgpu::CS_SET_INITIAL_EXEC,
|
||||
context.getTypeVoid(), localInvocationIdLocVar,
|
||||
workgroupSizeLocVar);
|
||||
|
||||
for (std::int32_t i = 0; i < 3; ++i) {
|
||||
auto value = builder.createSpvCompositeExtract(loc, uintT,
|
||||
localInvocationId, {{i}});
|
||||
context.writeReg(loc, builder, gcn::RegId::Vgpr, i, value);
|
||||
}
|
||||
}
|
||||
|
||||
context.writeReg(loc, builder, gcn::RegId::Vcc, 0, context.imm64(0));
|
||||
|
|
|
|||
|
|
@ -169,9 +169,6 @@ readSopkInst(GcnInstruction &inst, std::uint64_t &address,
|
|||
inst.addOperand(createSgprGcnOperand(address, sdst).withW());
|
||||
|
||||
inst.addOperand(GcnOperand::createConstant(static_cast<std::uint32_t>(simm)));
|
||||
if (op <= 16) {
|
||||
inst.addOperand(createImmediateGcnOperand(address));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -264,14 +261,10 @@ readVop3Inst(GcnInstruction &inst, std::uint64_t &address,
|
|||
auto omod = fetchMaskedValue(words[1], omodMask);
|
||||
auto neg = fetchMaskedValue(words[1], negMask);
|
||||
|
||||
if (op == ir::vop3::Op::MUL_HI_U32) {
|
||||
std::printf(".");
|
||||
}
|
||||
|
||||
inst.op = op;
|
||||
bool vop3b = isVop3b(op);
|
||||
|
||||
if (!vop3b) {
|
||||
if (vop3b) {
|
||||
abs = 0;
|
||||
clmp = false;
|
||||
}
|
||||
|
|
@ -291,19 +284,9 @@ readVop3Inst(GcnInstruction &inst, std::uint64_t &address,
|
|||
inst.addOperand(createSgprGcnOperand(address, sdst).withRW());
|
||||
}
|
||||
|
||||
bool writesVcc = op == ir::vop3::MAD_I64_I32 || op == ir::vop3::MAD_U64_U32 ||
|
||||
op == ir::vop3::MQSAD_U32_U8 ||
|
||||
op == ir::vop3::DIV_SCALE_F32 ||
|
||||
op == ir::vop3::DIV_SCALE_F64;
|
||||
bool readsVcc = op == ir::vop3::DIV_FMAS_F32 || op == ir::vop3::DIV_FMAS_F64;
|
||||
|
||||
bool usesSrc2 =
|
||||
op >= ir::vop3::MAD_LEGACY_F32 && op <= ir::vop3::DIV_FIXUP_F64;
|
||||
|
||||
if (writesVcc) {
|
||||
inst.addOperand(GcnOperand::createVccLo().withRW());
|
||||
}
|
||||
|
||||
inst.addOperand(createSgprGcnOperand(address, src0)
|
||||
.withR()
|
||||
.withAbs((abs & 1) != 0)
|
||||
|
|
@ -347,10 +330,6 @@ readVop3Inst(GcnInstruction &inst, std::uint64_t &address,
|
|||
.withNeg(((neg >> 2) & 1) != 0));
|
||||
}
|
||||
}
|
||||
|
||||
if (readsVcc) {
|
||||
inst.addOperand(GcnOperand::createVccLo().withR());
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ class Scheduler {
|
|||
unsigned mQueueFamily;
|
||||
vk::CommandPool mCommandPool;
|
||||
vk::CommandBuffer mCommandBuffer;
|
||||
bool mIsEmpty = false;
|
||||
|
||||
std::uint64_t mNextSignal = 1;
|
||||
std::mutex mTaskMutex;
|
||||
|
|
@ -40,9 +41,17 @@ public:
|
|||
|
||||
unsigned getQueueFamily() const { return mQueueFamily; }
|
||||
VkQueue getQueue() const { return mQueue; }
|
||||
VkCommandBuffer getCommandBuffer() const { return mCommandBuffer; }
|
||||
VkCommandBuffer getCommandBuffer() {
|
||||
mIsEmpty = false;
|
||||
return mCommandBuffer;
|
||||
}
|
||||
|
||||
Scheduler &submit() {
|
||||
if (mIsEmpty) {
|
||||
return *this;
|
||||
}
|
||||
mIsEmpty = true;
|
||||
|
||||
mCommandBuffer.end();
|
||||
|
||||
VkSemaphoreSubmitInfo waitSemSubmitInfo = {
|
||||
|
|
@ -56,7 +65,7 @@ public:
|
|||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
|
||||
.semaphore = mSemaphore.getHandle(),
|
||||
.value = mNextSignal,
|
||||
.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
||||
.stageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT,
|
||||
};
|
||||
|
||||
VkCommandBufferSubmitInfo cmdBufferSubmitInfo{
|
||||
|
|
@ -96,11 +105,14 @@ public:
|
|||
return *this;
|
||||
}
|
||||
|
||||
auto afterSubmit = std::move(mAfterSubmitTasks);
|
||||
mAfterSubmitTasks.clear();
|
||||
|
||||
wait();
|
||||
|
||||
while (!mAfterSubmitTasks.empty()) {
|
||||
auto task = std::move(mAfterSubmitTasks.back());
|
||||
mAfterSubmitTasks.pop_back();
|
||||
while (!afterSubmit.empty()) {
|
||||
auto task = std::move(afterSubmit.back());
|
||||
afterSubmit.pop_back();
|
||||
std::move(task)();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -42,7 +42,6 @@ struct Context {
|
|||
VkFormat swapchainColorFormat = VK_FORMAT_B8G8R8A8_UNORM;
|
||||
VkColorSpaceKHR swapchainColorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR;
|
||||
std::vector<VkImageView> swapchainImageViews;
|
||||
std::vector<VkFence> inFlightFences;
|
||||
VkSemaphore presentCompleteSemaphore = VK_NULL_HANDLE;
|
||||
VkSemaphore renderCompleteSemaphore = VK_NULL_HANDLE;
|
||||
VkPhysicalDeviceDescriptorBufferPropertiesEXT descriptorBufferProps;
|
||||
|
|
@ -69,10 +68,6 @@ struct Context {
|
|||
vkDestroySwapchainKHR(device, swapchain, allocator);
|
||||
}
|
||||
|
||||
for (auto fence : inFlightFences) {
|
||||
vkDestroyFence(device, fence, allocator);
|
||||
}
|
||||
|
||||
if (presentCompleteSemaphore != VK_NULL_HANDLE) {
|
||||
vkDestroySemaphore(device, presentCompleteSemaphore, allocator);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -198,15 +198,6 @@ void vk::Context::createSwapchain() {
|
|||
|
||||
recreateSwapchain();
|
||||
|
||||
inFlightFences.resize(swapchainImages.size());
|
||||
|
||||
for (auto &fence : inFlightFences) {
|
||||
VkFenceCreateInfo fenceInfo{};
|
||||
fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
|
||||
fenceInfo.flags = VK_FENCE_CREATE_SIGNALED_BIT;
|
||||
|
||||
VK_VERIFY(vkCreateFence(device, &fenceInfo, allocator, &fence));
|
||||
}
|
||||
{
|
||||
VkSemaphoreCreateInfo semaphoreCreateInfo{};
|
||||
semaphoreCreateInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
|
||||
|
|
@ -422,6 +413,10 @@ void vk::Context::createDevice(VkSurfaceKHR surface, int gpuIndex,
|
|||
storage_8bit.uniformAndStorageBuffer8BitAccess && float16_int8.shaderInt8;
|
||||
supportsInt64Atomics = phyDevFeatures12.shaderBufferInt64Atomics;
|
||||
|
||||
if (!fsBarycentric.fragmentShaderBarycentric) {
|
||||
shaderObject.pNext = fsBarycentric.pNext;
|
||||
}
|
||||
|
||||
rx::dieIf(!storage_16bit.uniformAndStorageBuffer16BitAccess,
|
||||
"16-bit storage is unsupported by this GPU");
|
||||
rx::dieIf(!float16_int8.shaderFloat16,
|
||||
|
|
|
|||
|
|
@ -18,8 +18,6 @@
|
|||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <print>
|
||||
#include <span>
|
||||
#include <thread>
|
||||
|
|
@ -39,20 +37,6 @@
|
|||
|
||||
#include "Device.hpp"
|
||||
|
||||
static void saveImage(const char *name, const void *data, std::uint32_t width,
|
||||
std::uint32_t height) {
|
||||
std::ofstream file(name, std::ios::out | std::ios::binary);
|
||||
|
||||
file << "P6\n" << width << "\n" << height << "\n" << 255 << "\n";
|
||||
|
||||
auto ptr = (unsigned int *)data;
|
||||
for (uint32_t y = 0; y < height; y++) {
|
||||
for (uint32_t x = 0; x < width; x++) {
|
||||
file.write((char *)ptr++, 3);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void transitionImageLayout(VkCommandBuffer commandBuffer, VkImage image,
|
||||
VkImageLayout oldLayout, VkImageLayout newLayout,
|
||||
const VkImageSubresourceRange &subresourceRange) {
|
||||
|
|
@ -118,17 +102,6 @@ void transitionImageLayout(VkCommandBuffer commandBuffer, VkImage image,
|
|||
});
|
||||
}
|
||||
|
||||
static void submit(VkQueue queue, VkCommandBuffer cmdBuffer) {
|
||||
VkSubmitInfo submit{
|
||||
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
||||
.commandBufferCount = 1,
|
||||
.pCommandBuffers = &cmdBuffer,
|
||||
};
|
||||
|
||||
VK_VERIFY(vkQueueSubmit(queue, 1, &submit, nullptr));
|
||||
vkQueueWaitIdle(queue);
|
||||
}
|
||||
|
||||
static void usage(std::FILE *out, const char *argv0) {
|
||||
std::fprintf(out, "usage: %s [options...]\n", argv0);
|
||||
std::fprintf(out, " options:\n");
|
||||
|
|
@ -374,12 +347,6 @@ int main(int argc, const char *argv[]) {
|
|||
VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT);
|
||||
|
||||
vkContext.createSwapchain();
|
||||
std::vector<vk::CommandBuffer> presentCmdBuffers(
|
||||
vkContext.swapchainImages.size());
|
||||
|
||||
for (auto &cmdBuffer : presentCmdBuffers) {
|
||||
cmdBuffer = commandPool.createPrimaryBuffer({});
|
||||
}
|
||||
|
||||
amdgpu::bridge::BridgePuller bridgePuller{bridge};
|
||||
amdgpu::bridge::Command commandsBuffer[1];
|
||||
|
|
@ -597,14 +564,11 @@ int main(int argc, const char *argv[]) {
|
|||
|
||||
case amdgpu::bridge::CommandId::Flip: {
|
||||
if (!isImageAcquired) {
|
||||
vkWaitForFences(vkContext.device, 1,
|
||||
&vkContext.inFlightFences[imageIndex], VK_TRUE,
|
||||
UINT64_MAX);
|
||||
|
||||
while (true) {
|
||||
auto acquireNextImageResult = vkAcquireNextImageKHR(
|
||||
vkContext.device, vkContext.swapchain, UINT64_MAX,
|
||||
vkContext.presentCompleteSemaphore, nullptr, &imageIndex);
|
||||
vkContext.presentCompleteSemaphore, VK_NULL_HANDLE,
|
||||
&imageIndex);
|
||||
if (acquireNextImageResult == VK_ERROR_OUT_OF_DATE_KHR) {
|
||||
vkContext.recreateSwapchain();
|
||||
continue;
|
||||
|
|
@ -613,18 +577,11 @@ int main(int argc, const char *argv[]) {
|
|||
VK_VERIFY(acquireNextImageResult);
|
||||
break;
|
||||
}
|
||||
|
||||
vkResetFences(vkContext.device, 1,
|
||||
&vkContext.inFlightFences[imageIndex]);
|
||||
}
|
||||
|
||||
vkResetCommandBuffer(presentCmdBuffers[imageIndex], 0);
|
||||
|
||||
if (!device.flip(cmd.flip.pid, cmd.flip.bufferIndex, cmd.flip.arg,
|
||||
presentCmdBuffers[imageIndex],
|
||||
vkContext.swapchainImages[imageIndex],
|
||||
vkContext.swapchainImageViews[imageIndex],
|
||||
vkContext.inFlightFences[imageIndex])) {
|
||||
vkContext.swapchainImageViews[imageIndex])) {
|
||||
isImageAcquired = true;
|
||||
break;
|
||||
}
|
||||
|
|
@ -641,6 +598,8 @@ int main(int argc, const char *argv[]) {
|
|||
auto vkQueuePresentResult =
|
||||
vkQueuePresentKHR(vkContext.presentQueue, &presentInfo);
|
||||
|
||||
isImageAcquired = false;
|
||||
|
||||
if (vkQueuePresentResult == VK_ERROR_OUT_OF_DATE_KHR) {
|
||||
vkContext.recreateSwapchain();
|
||||
} else {
|
||||
|
|
@ -678,4 +637,6 @@ int main(int argc, const char *argv[]) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
vkDeviceWaitIdle(vk::context->device);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue