rpcsx-gpu: initial cache implementation

This commit is contained in:
DH 2024-10-09 05:14:46 +03:00
parent 28e1b544e6
commit dd2ed74ff8
8 changed files with 984 additions and 389 deletions

File diff suppressed because it is too large Load diff

View file

@ -3,10 +3,12 @@
#include "Pipe.hpp"
#include "amdgpu/tiler.hpp"
#include "gnm/constants.hpp"
#include "rx/AddressRange.hpp"
#include "shader/Access.hpp"
#include "shader/Evaluator.hpp"
#include "shader/GcnConverter.hpp"
#include <algorithm>
#include <map>
#include <memory>
#include <print>
#include <rx/ConcurrentBitPool.hpp>
@ -33,7 +35,6 @@ struct ImageKey {
gnm::DataFormat dfmt;
gnm::NumericFormat nfmt;
TileMode tileMode = {};
VkOffset3D offset = {};
VkExtent3D extent = {1, 1, 1};
std::uint32_t pitch = 1;
unsigned baseMipLevel = 0;
@ -69,6 +70,16 @@ struct SamplerKey {
};
struct Cache {
enum class EntryType {
HostVisibleBuffer,
DeviceLocalBuffer,
IndexBuffer,
Image,
Shader,
Count
};
static constexpr std::array kGraphicsStages = {
VK_SHADER_STAGE_VERTEX_BIT,
VK_SHADER_STAGE_GEOMETRY_BIT,
@ -115,8 +126,6 @@ struct Cache {
enum class TagId : std::uint64_t {};
struct Entry;
int vmId = -1;
struct Shader {
VkShaderEXT handle = VK_NULL_HANDLE;
shader::gcn::ShaderInfo *info;
@ -145,6 +154,7 @@ struct Cache {
struct Image {
VkImage handle = VK_NULL_HANDLE;
Entry *entry;
VkFormat format;
VkImageSubresourceRange subresource;
};
@ -152,6 +162,7 @@ struct Cache {
struct ImageView {
VkImageView handle = VK_NULL_HANDLE;
VkImage imageHandle;
VkFormat format;
VkImageSubresourceRange subresource;
};
@ -211,7 +222,8 @@ private:
template <typename T> T readPointer(std::uint64_t address) {
T result{};
cacheTag->readMemory(&result, address, sizeof(result));
cacheTag->readMemory(
&result, rx::AddressRange::fromBeginSize(address, sizeof(result)));
return result;
}
@ -227,7 +239,9 @@ private:
std::uint32_t resourceSlot;
};
std::vector<std::shared_ptr<Entry>> mAcquiredResources;
std::vector<std::shared_ptr<Entry>> mAcquiredImageResources;
std::vector<std::shared_ptr<Entry>> mAcquiredMemoryResources;
std::vector<std::shared_ptr<Entry>> mAcquiredViewResources;
std::vector<MemoryTableConfigSlot> memoryTableConfigSlots;
std::vector<std::uint32_t *> descriptorBuffers;
ShaderResources shaderResources;
@ -236,7 +250,8 @@ private:
TagStorage(const TagStorage &) = delete;
void clear() {
mAcquiredResources.clear();
mAcquiredImageResources.clear();
mAcquiredMemoryResources.clear();
memoryTableConfigSlots.clear();
descriptorBuffers.clear();
shaderResources.clear();
@ -247,6 +262,7 @@ private:
TagStorage *mStorage = nullptr;
Scheduler *mScheduler = nullptr;
Cache *mParent = nullptr;
std::unique_lock<std::mutex> mResourcesLock;
TagId mTagId{};
std::uint32_t mAcquiredMemoryTable = -1;
};
@ -275,33 +291,33 @@ public:
Cache *getCache() const { return mParent; }
Device *getDevice() const { return mParent->mDevice; }
Scheduler &getScheduler() const { return *mScheduler; }
int getVmId() const { return mParent->mVmIm; }
int getVmId() const { return mParent->mVmId; }
Buffer getInternalHostVisibleBuffer(std::uint64_t size);
Buffer getInternalDeviceLocalBuffer(std::uint64_t size);
void unlock() { mResourcesLock.unlock(); }
void buildDescriptors(VkDescriptorSet descriptorSet);
Sampler getSampler(const SamplerKey &key);
Buffer getBuffer(std::uint64_t address, std::uint64_t size, Access access);
Buffer getBuffer(rx::AddressRange range, Access access);
IndexBuffer getIndexBuffer(std::uint64_t address, std::uint32_t offset,
std::uint32_t indexCount,
gnm::PrimitiveType primType,
gnm::IndexType indexType);
Image getImage(const ImageKey &key, Access access);
ImageView getImageView(const ImageKey &key, Access access);
void readMemory(void *target, std::uint64_t address, std::uint64_t size);
void writeMemory(const void *source, std::uint64_t address,
std::uint64_t size);
int compareMemory(const void *source, std::uint64_t address,
std::uint64_t size);
void readMemory(void *target, rx::AddressRange range);
void writeMemory(const void *source, rx::AddressRange range);
int compareMemory(const void *source, rx::AddressRange range);
void release();
VkPipelineLayout getGraphicsPipelineLayout() const {
[[nodiscard]] VkPipelineLayout getGraphicsPipelineLayout() const {
return getCache()->getGraphicsPipelineLayout();
}
VkPipelineLayout getComputePipelineLayout() const {
[[nodiscard]] VkPipelineLayout getComputePipelineLayout() const {
return getCache()->getComputePipelineLayout();
}
@ -423,6 +439,9 @@ private:
result.mParent = this;
result.mScheduler = &scheduler;
std::unique_lock<std::mutex> lock(mResourcesMtx);
result.mResourcesLock = std::move(lock);
return result;
}
@ -457,23 +476,37 @@ public:
flush(scheduler, 0, ~static_cast<std::uint64_t>(0));
}
VkPipelineLayout getGraphicsPipelineLayout() const {
[[nodiscard]] VkPipelineLayout getGraphicsPipelineLayout() const {
return mGraphicsPipelineLayout;
}
VkPipelineLayout getComputePipelineLayout() const {
[[nodiscard]] VkPipelineLayout getComputePipelineLayout() const {
return mComputePipelineLayout;
}
auto &getGraphicsDescriptorSetLayouts() const {
[[nodiscard]] auto &getGraphicsDescriptorSetLayouts() const {
return mGraphicsDescriptorSetLayouts;
}
void trackUpdate(EntryType type, rx::AddressRange range,
std::shared_ptr<Entry> entry, TagId tagId,
bool watchChanges);
void trackWrite(rx::AddressRange range, TagId tagId, bool lockMemory);
[[nodiscard]] bool isInSync(rx::AddressRange range, TagId expTagId) {
auto syncIt = mSyncTable.queryArea(range.beginAddress());
return syncIt != mSyncTable.end() && syncIt.range().contains(range) &&
syncIt.get() == expTagId;
}
auto &getTable(EntryType type) { return mTables[static_cast<int>(type)]; }
private:
TagId getSyncTag(std::uint64_t address, std::uint64_t size, TagId currentTag);
std::shared_ptr<Entry> getInSyncEntry(EntryType type, rx::AddressRange range);
Device *mDevice;
int mVmIm;
int mVmId;
std::atomic<TagId> mNextTagId{TagId{2}};
vk::Buffer mGdsBuffer;
@ -502,12 +535,10 @@ private:
std::map<SamplerKey, VkSampler> mSamplers;
std::shared_ptr<Entry> mFrameBuffers[10];
std::mutex mResourcesMtx;
rx::MemoryTableWithPayload<std::shared_ptr<Entry>> mBuffers;
rx::MemoryTableWithPayload<std::shared_ptr<Entry>> mIndexBuffers;
rx::MemoryTableWithPayload<std::shared_ptr<Entry>> mImages;
rx::MemoryTableWithPayload<std::shared_ptr<Entry>> mShaders;
rx::MemoryTableWithPayload<std::shared_ptr<Entry>> mSyncTable;
rx::MemoryTableWithPayload<std::shared_ptr<Entry>>
mTables[static_cast<std::size_t>(EntryType::Count)];
rx::MemoryTableWithPayload<TagId> mSyncTable;
};
} // namespace amdgpu

View file

@ -11,6 +11,7 @@
#include "shaders/rdna-semantic-spirv.hpp"
#include "vk.hpp"
#include <fcntl.h>
#include <print>
#include <sys/mman.h>
using namespace amdgpu;
@ -33,10 +34,6 @@ Device::Device() {
rx::die("failed to deserialize builtin semantics\n");
}
for (int index = 0; auto &cache : caches) {
cache.vmId = index++;
}
for (auto &pipe : graphicsPipes) {
pipe.device = this;
}
@ -73,7 +70,7 @@ void Device::mapProcess(std::int64_t pid, int vmId, const char *shmName) {
process.vmFd = memoryFd;
if (memoryFd < 0) {
std::printf("failed to process %x shared memory\n", (int)pid);
std::println("failed to process {:x} shared memory", (int)pid);
std::abort();
}
@ -95,7 +92,7 @@ void Device::mapProcess(std::int64_t pid, int vmId, const char *shmName) {
gpuProt, MAP_FIXED | MAP_SHARED, mapFd, devOffset);
if (mmapResult == MAP_FAILED) {
std::printf("failed to map process %x memory, address %lx-%lx, type %x\n",
std::println("failed to map process {:x} memory, address {:x}-{:x}, type {:x}",
(int)pid, startAddress, endAddress, slot.memoryType);
std::abort();
}

View file

@ -454,9 +454,12 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
vsPrimType = pipe.uConfig.vgtPrimitiveType.value;
}
auto indexOffset =
indexBuffer.handle == VK_NULL_HANDLE ? indexBuffer.offset : 0;
vertexShader = cacheTag.getVertexShader(
gcn::Stage::VsVs, pipe.sh.spiShaderPgmVs, pipe.context,
indexBuffer.offset, vsPrimType, viewPorts);
gcn::Stage::VsVs, pipe.sh.spiShaderPgmVs, pipe.context, indexOffset,
vsPrimType, viewPorts);
}
auto pixelShader =

View file

@ -217,7 +217,7 @@ void vk::Context::recreateSwapchain() {
&surfCaps));
uint32_t presentModeCount;
VK_VERIFY(vkGetPhysicalDeviceSurfacePresentModesKHR(physicalDevice, surface,
&presentModeCount, NULL));
&presentModeCount, nullptr));
std::vector<VkPresentModeKHR> presentModes(presentModeCount);
VK_VERIFY(vkGetPhysicalDeviceSurfacePresentModesKHR(

View file

@ -1,6 +1,7 @@
#include "vk.hpp"
#include <amdgpu/bridge/bridge.hpp>
#include <print>
#include <rx/MemoryTable.hpp>
#include <rx/atScopeExit.hpp>
#include <rx/die.hpp>
@ -103,22 +104,22 @@ void transitionImageLayout(VkCommandBuffer commandBuffer, VkImage image,
}
static void usage(std::FILE *out, const char *argv0) {
std::fprintf(out, "usage: %s [options...]\n", argv0);
std::fprintf(out, " options:\n");
std::fprintf(out, " --version, -v - print version\n");
std::fprintf(out,
" --cmd-bridge <name> - setup command queue bridge name\n");
std::fprintf(out, " --shm <name> - setup shared memory name\n");
std::fprintf(
std::println(out, "usage: {} [options...]", argv0);
std::println(out, " options:");
std::println(out, " --version, -v - print version");
std::println(out,
" --cmd-bridge <name> - setup command queue bridge name");
std::println(out, " --shm <name> - setup shared memory name");
std::println(
out,
" --gpu <index> - specify physical gpu index to use, default is 0\n");
std::fprintf(out,
" --presenter <presenter mode> - set flip engine target\n");
std::fprintf(out, " --validate - enable validation layers\n");
std::fprintf(out, " -h, --help - show this message\n");
std::fprintf(out, "\n");
std::fprintf(out, " presenter mode:\n");
std::fprintf(out, " window - create and use native window (default)\n");
" --gpu <index> - specify physical gpu index to use, default is 0");
std::println(out,
" --presenter <presenter mode> - set flip engine target");
std::println(out, " --validate - enable validation layers");
std::println(out, " -h, --help - show this message");
std::println(out, "");
std::println(out, " presenter mode:");
std::println(out, " window - create and use native window (default)");
}
static VKAPI_ATTR VkBool32 VKAPI_CALL debugUtilsMessageCallback(
@ -340,7 +341,7 @@ int main(int argc, const char *argv[]) {
vk::getHostVisibleMemory().initHostVisible(
std::min(hostVisibleMemoryTotalSize / 2, 1ul * 1024 * 1024 * 1024));
vk::getDeviceLocalMemory().initDeviceLocal(
std::min(localMemoryTotalSize / 2, 4ul * 1024 * 1024 * 1024));
std::min(localMemoryTotalSize / 4, 4ul * 1024 * 1024 * 1024));
auto commandPool =
vk::CommandPool::Create(vkContext.presentQueueFamily,
@ -398,7 +399,7 @@ int main(int argc, const char *argv[]) {
if (gpIndex > GLFW_JOYSTICK_LAST) {
for (int i = 0; i <= GLFW_JOYSTICK_LAST; ++i) {
if (glfwJoystickIsGamepad(i) == GLFW_TRUE) {
std::printf("Gamepad \"%s\" activated", glfwGetGamepadName(i));
std::print("Gamepad \"{}\" activated", glfwGetGamepadName(i));
gpIndex = i;
break;
}

View file

@ -0,0 +1,68 @@
#pragma once
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
namespace rx {
class AddressRange {
std::uint64_t mBeginAddress = -1;
std::uint64_t mEndAddress = 0;
public:
constexpr AddressRange() = default;
[[nodiscard]] static constexpr AddressRange
fromBeginSize(std::uint64_t begin, std::uint64_t size) {
AddressRange result;
result.mBeginAddress = begin;
result.mEndAddress = begin + size;
return result;
}
[[nodiscard]] static constexpr AddressRange fromBeginEnd(std::uint64_t begin,
std::uint64_t end) {
AddressRange result;
result.mBeginAddress = begin;
result.mEndAddress = end;
return result;
}
[[nodiscard]] constexpr bool isValid() const {
return mBeginAddress < mEndAddress;
}
constexpr explicit operator bool() const { return isValid(); }
[[nodiscard]] constexpr bool intersects(AddressRange other) const {
return mBeginAddress < other.mEndAddress &&
mEndAddress > other.mBeginAddress;
}
[[nodiscard]] constexpr bool contains(AddressRange other) const {
return mBeginAddress <= other.mBeginAddress &&
mEndAddress >= other.mEndAddress;
}
[[nodiscard]] constexpr bool contains(std::uint64_t address) const {
return address >= mBeginAddress && address < mEndAddress;
}
[[nodiscard]] constexpr AddressRange merge(AddressRange other) const {
return fromBeginEnd(std::min(mBeginAddress, other.mBeginAddress),
std::max(mEndAddress, other.mEndAddress));
}
[[nodiscard]] constexpr AddressRange intersection(AddressRange other) const {
return fromBeginEnd(std::max(mBeginAddress, other.mBeginAddress),
std::min(mEndAddress, other.mEndAddress));
}
[[nodiscard]] constexpr std::size_t size() const {
return mEndAddress - mBeginAddress;
}
[[nodiscard]] constexpr std::size_t beginAddress() const {
return mBeginAddress;
}
[[nodiscard]] constexpr std::size_t endAddress() const { return mEndAddress; }
constexpr bool operator==(const AddressRange &) const = default;
};
} // namespace rx

View file

@ -1,5 +1,6 @@
#pragma once
#include "rx/AddressRange.hpp"
#include <cassert>
#include <cstdint>
#include <map>
@ -232,6 +233,10 @@ public:
return {it->first, std::next(it)->first, it->second.second};
}
rx::AddressRange range() const {
return rx::AddressRange::fromBeginEnd(beginAddress(), endAddress());
}
std::uint64_t beginAddress() const { return it->first; }
std::uint64_t endAddress() const { return std::next(it)->first; }
std::uint64_t size() const { return endAddress() - beginAddress(); }
@ -307,7 +312,7 @@ public:
}
iterator map(std::uint64_t beginAddress, std::uint64_t endAddress,
PayloadT payload, bool merge = true) {
PayloadT payload, bool merge = true, bool noOverride = false) {
assert(beginAddress < endAddress);
auto [beginIt, beginInserted] =
mAreas.emplace(beginAddress, std::pair{Kind::O, payload});
@ -318,6 +323,10 @@ public:
bool endCollision = false;
bool lastRemovedIsOpen = false;
PayloadT lastRemovedOpenPayload;
if (noOverride && !beginInserted && !endInserted &&
std::next(beginIt) == endIt) {
return beginIt;
}
if (!beginInserted || !endInserted) {
if (!beginInserted) {