rpcsx-gpu: initial cache implementation

2026-03-06 21:44:02 +01:00 · 2024-10-09 05:14:46 +03:00 · 2024-10-09 05:14:46 +03:00 · dd2ed74ff8
parent 28e1b544e6
commit dd2ed74ff8
8 changed files with 984 additions and 389 deletions
--- a/rpcsx-gpu/Cache.cpp
+++ b/rpcsx-gpu/Cache.cpp
--- a/rpcsx-gpu/Cache.hpp
+++ b/rpcsx-gpu/Cache.hpp
@ -3,10 +3,12 @@
 #include "Pipe.hpp"
 #include "amdgpu/tiler.hpp"
 #include "gnm/constants.hpp"
+#include "rx/AddressRange.hpp"
 #include "shader/Access.hpp"
 #include "shader/Evaluator.hpp"
 #include "shader/GcnConverter.hpp"
 #include <algorithm>
+#include <map>
 #include <memory>
 #include <print>
 #include <rx/ConcurrentBitPool.hpp>
@ -33,7 +35,6 @@ struct ImageKey {
  gnm::DataFormat dfmt;
  gnm::NumericFormat nfmt;
  TileMode tileMode = {};
-  VkOffset3D offset = {};
  VkExtent3D extent = {1, 1, 1};
  std::uint32_t pitch = 1;
  unsigned baseMipLevel = 0;
@ -69,6 +70,16 @@ struct SamplerKey {
 };

 struct Cache {
+  enum class EntryType {
+    HostVisibleBuffer,
+    DeviceLocalBuffer,
+    IndexBuffer,
+    Image,
+    Shader,
+
+    Count
+  };
+
  static constexpr std::array kGraphicsStages = {
      VK_SHADER_STAGE_VERTEX_BIT,
      VK_SHADER_STAGE_GEOMETRY_BIT,
@ -115,8 +126,6 @@ struct Cache {
  enum class TagId : std::uint64_t {};
  struct Entry;

-  int vmId = -1;
-
  struct Shader {
    VkShaderEXT handle = VK_NULL_HANDLE;
    shader::gcn::ShaderInfo *info;
@ -145,6 +154,7 @@ struct Cache {

  struct Image {
    VkImage handle = VK_NULL_HANDLE;
+    Entry *entry;
    VkFormat format;
    VkImageSubresourceRange subresource;
  };
@ -152,6 +162,7 @@ struct Cache {
  struct ImageView {
    VkImageView handle = VK_NULL_HANDLE;
    VkImage imageHandle;
+    VkFormat format;
    VkImageSubresourceRange subresource;
  };

@ -211,7 +222,8 @@ private:

    template <typename T> T readPointer(std::uint64_t address) {
      T result{};
-      cacheTag->readMemory(&result, address, sizeof(result));
+      cacheTag->readMemory(
+          &result, rx::AddressRange::fromBeginSize(address, sizeof(result)));
      return result;
    }

@ -227,7 +239,9 @@ private:
      std::uint32_t resourceSlot;
    };

-    std::vector<std::shared_ptr<Entry>> mAcquiredResources;
+    std::vector<std::shared_ptr<Entry>> mAcquiredImageResources;
+    std::vector<std::shared_ptr<Entry>> mAcquiredMemoryResources;
+    std::vector<std::shared_ptr<Entry>> mAcquiredViewResources;
    std::vector<MemoryTableConfigSlot> memoryTableConfigSlots;
    std::vector<std::uint32_t *> descriptorBuffers;
    ShaderResources shaderResources;
@ -236,7 +250,8 @@ private:
    TagStorage(const TagStorage &) = delete;

    void clear() {
-      mAcquiredResources.clear();
+      mAcquiredImageResources.clear();
+      mAcquiredMemoryResources.clear();
      memoryTableConfigSlots.clear();
      descriptorBuffers.clear();
      shaderResources.clear();
@ -247,6 +262,7 @@ private:
    TagStorage *mStorage = nullptr;
    Scheduler *mScheduler = nullptr;
    Cache *mParent = nullptr;
+    std::unique_lock<std::mutex> mResourcesLock;
    TagId mTagId{};
    std::uint32_t mAcquiredMemoryTable = -1;
  };
@ -275,33 +291,33 @@ public:
    Cache *getCache() const { return mParent; }
    Device *getDevice() const { return mParent->mDevice; }
    Scheduler &getScheduler() const { return *mScheduler; }
-    int getVmId() const { return mParent->mVmIm; }
+    int getVmId() const { return mParent->mVmId; }

    Buffer getInternalHostVisibleBuffer(std::uint64_t size);
    Buffer getInternalDeviceLocalBuffer(std::uint64_t size);

+    void unlock() { mResourcesLock.unlock(); }
+
    void buildDescriptors(VkDescriptorSet descriptorSet);

    Sampler getSampler(const SamplerKey &key);
-    Buffer getBuffer(std::uint64_t address, std::uint64_t size, Access access);
+    Buffer getBuffer(rx::AddressRange range, Access access);
    IndexBuffer getIndexBuffer(std::uint64_t address, std::uint32_t offset,
                               std::uint32_t indexCount,
                               gnm::PrimitiveType primType,
                               gnm::IndexType indexType);
    Image getImage(const ImageKey &key, Access access);
    ImageView getImageView(const ImageKey &key, Access access);
-    void readMemory(void *target, std::uint64_t address, std::uint64_t size);
-    void writeMemory(const void *source, std::uint64_t address,
-                     std::uint64_t size);
-    int compareMemory(const void *source, std::uint64_t address,
-                      std::uint64_t size);
+    void readMemory(void *target, rx::AddressRange range);
+    void writeMemory(const void *source, rx::AddressRange range);
+    int compareMemory(const void *source, rx::AddressRange range);
    void release();

-    VkPipelineLayout getGraphicsPipelineLayout() const {
+    [[nodiscard]] VkPipelineLayout getGraphicsPipelineLayout() const {
      return getCache()->getGraphicsPipelineLayout();
    }

-    VkPipelineLayout getComputePipelineLayout() const {
+    [[nodiscard]] VkPipelineLayout getComputePipelineLayout() const {
      return getCache()->getComputePipelineLayout();
    }

@ -423,6 +439,9 @@ private:
    result.mParent = this;
    result.mScheduler = &scheduler;

+    std::unique_lock<std::mutex> lock(mResourcesMtx);
+    result.mResourcesLock = std::move(lock);
+
    return result;
  }

@ -457,23 +476,37 @@ public:
    flush(scheduler, 0, ~static_cast<std::uint64_t>(0));
  }

-  VkPipelineLayout getGraphicsPipelineLayout() const {
+  [[nodiscard]] VkPipelineLayout getGraphicsPipelineLayout() const {
    return mGraphicsPipelineLayout;
  }

-  VkPipelineLayout getComputePipelineLayout() const {
+  [[nodiscard]] VkPipelineLayout getComputePipelineLayout() const {
    return mComputePipelineLayout;
  }

-  auto &getGraphicsDescriptorSetLayouts() const {
+  [[nodiscard]] auto &getGraphicsDescriptorSetLayouts() const {
    return mGraphicsDescriptorSetLayouts;
  }

+  void trackUpdate(EntryType type, rx::AddressRange range,
+                   std::shared_ptr<Entry> entry, TagId tagId,
+                   bool watchChanges);
+
+  void trackWrite(rx::AddressRange range, TagId tagId, bool lockMemory);
+
+  [[nodiscard]] bool isInSync(rx::AddressRange range, TagId expTagId) {
+    auto syncIt = mSyncTable.queryArea(range.beginAddress());
+    return syncIt != mSyncTable.end() && syncIt.range().contains(range) &&
+           syncIt.get() == expTagId;
+  }
+
+  auto &getTable(EntryType type) { return mTables[static_cast<int>(type)]; }
+
 private:
-  TagId getSyncTag(std::uint64_t address, std::uint64_t size, TagId currentTag);
+  std::shared_ptr<Entry> getInSyncEntry(EntryType type, rx::AddressRange range);

  Device *mDevice;
-  int mVmIm;
+  int mVmId;
  std::atomic<TagId> mNextTagId{TagId{2}};
  vk::Buffer mGdsBuffer;

@ -502,12 +535,10 @@ private:
  std::map<SamplerKey, VkSampler> mSamplers;

  std::shared_ptr<Entry> mFrameBuffers[10];
+  std::mutex mResourcesMtx;

-  rx::MemoryTableWithPayload<std::shared_ptr<Entry>> mBuffers;
-  rx::MemoryTableWithPayload<std::shared_ptr<Entry>> mIndexBuffers;
-  rx::MemoryTableWithPayload<std::shared_ptr<Entry>> mImages;
-  rx::MemoryTableWithPayload<std::shared_ptr<Entry>> mShaders;
-
-  rx::MemoryTableWithPayload<std::shared_ptr<Entry>> mSyncTable;
+  rx::MemoryTableWithPayload<std::shared_ptr<Entry>>
+      mTables[static_cast<std::size_t>(EntryType::Count)];
+  rx::MemoryTableWithPayload<TagId> mSyncTable;
 };
 } // namespace amdgpu
--- a/rpcsx-gpu/Device.cpp
+++ b/rpcsx-gpu/Device.cpp
@ -11,6 +11,7 @@
 #include "shaders/rdna-semantic-spirv.hpp"
 #include "vk.hpp"
 #include <fcntl.h>
+#include <print>
 #include <sys/mman.h>

 using namespace amdgpu;
@ -33,10 +34,6 @@ Device::Device() {
    rx::die("failed to deserialize builtin semantics\n");
  }

-  for (int index = 0; auto &cache : caches) {
-    cache.vmId = index++;
-  }
-
  for (auto &pipe : graphicsPipes) {
    pipe.device = this;
  }
@ -73,7 +70,7 @@ void Device::mapProcess(std::int64_t pid, int vmId, const char *shmName) {
  process.vmFd = memoryFd;

  if (memoryFd < 0) {
-    std::printf("failed to process %x shared memory\n", (int)pid);
+    std::println("failed to process {:x} shared memory", (int)pid);
    std::abort();
  }

@ -95,7 +92,7 @@ void Device::mapProcess(std::int64_t pid, int vmId, const char *shmName) {
               gpuProt, MAP_FIXED | MAP_SHARED, mapFd, devOffset);

    if (mmapResult == MAP_FAILED) {
-      std::printf("failed to map process %x memory, address %lx-%lx, type %x\n",
+      std::println("failed to map process {:x} memory, address {:x}-{:x}, type {:x}",
                  (int)pid, startAddress, endAddress, slot.memoryType);
      std::abort();
    }
--- a/rpcsx-gpu/Renderer.cpp
+++ b/rpcsx-gpu/Renderer.cpp
@ -454,9 +454,12 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex,
      vsPrimType = pipe.uConfig.vgtPrimitiveType.value;
    }

+    auto indexOffset =
+        indexBuffer.handle == VK_NULL_HANDLE ? indexBuffer.offset : 0;
+
    vertexShader = cacheTag.getVertexShader(
-        gcn::Stage::VsVs, pipe.sh.spiShaderPgmVs, pipe.context,
-        indexBuffer.offset, vsPrimType, viewPorts);
+        gcn::Stage::VsVs, pipe.sh.spiShaderPgmVs, pipe.context, indexOffset,
+        vsPrimType, viewPorts);
  }

  auto pixelShader =
--- a/rpcsx-gpu/lib/vk/src/vk.cpp
+++ b/rpcsx-gpu/lib/vk/src/vk.cpp
@ -217,7 +217,7 @@ void vk::Context::recreateSwapchain() {
                                                      &surfCaps));
  uint32_t presentModeCount;
  VK_VERIFY(vkGetPhysicalDeviceSurfacePresentModesKHR(physicalDevice, surface,
-                                                      &presentModeCount, NULL));
+                                                      &presentModeCount, nullptr));

  std::vector<VkPresentModeKHR> presentModes(presentModeCount);
  VK_VERIFY(vkGetPhysicalDeviceSurfacePresentModesKHR(
--- a/rpcsx-gpu/main.cpp
+++ b/rpcsx-gpu/main.cpp
@ -1,6 +1,7 @@
 #include "vk.hpp"

 #include <amdgpu/bridge/bridge.hpp>
+#include <print>
 #include <rx/MemoryTable.hpp>
 #include <rx/atScopeExit.hpp>
 #include <rx/die.hpp>
@ -103,22 +104,22 @@ void transitionImageLayout(VkCommandBuffer commandBuffer, VkImage image,
 }

 static void usage(std::FILE *out, const char *argv0) {
-  std::fprintf(out, "usage: %s [options...]\n", argv0);
-  std::fprintf(out, "  options:\n");
-  std::fprintf(out, "  --version, -v - print version\n");
-  std::fprintf(out,
-               "    --cmd-bridge <name> - setup command queue bridge name\n");
-  std::fprintf(out, "    --shm <name> - setup shared memory name\n");
-  std::fprintf(
+  std::println(out, "usage: {} [options...]", argv0);
+  std::println(out, "  options:");
+  std::println(out, "  --version, -v - print version");
+  std::println(out,
+               "    --cmd-bridge <name> - setup command queue bridge name");
+  std::println(out, "    --shm <name> - setup shared memory name");
+  std::println(
      out,
-      "    --gpu <index> - specify physical gpu index to use, default is 0\n");
-  std::fprintf(out,
-               "    --presenter <presenter mode> - set flip engine target\n");
-  std::fprintf(out, "    --validate - enable validation layers\n");
-  std::fprintf(out, "    -h, --help - show this message\n");
-  std::fprintf(out, "\n");
-  std::fprintf(out, "  presenter mode:\n");
-  std::fprintf(out, "     window - create and use native window (default)\n");
+      "    --gpu <index> - specify physical gpu index to use, default is 0");
+  std::println(out,
+               "    --presenter <presenter mode> - set flip engine target");
+  std::println(out, "    --validate - enable validation layers");
+  std::println(out, "    -h, --help - show this message");
+  std::println(out, "");
+  std::println(out, "  presenter mode:");
+  std::println(out, "     window - create and use native window (default)");
 }

 static VKAPI_ATTR VkBool32 VKAPI_CALL debugUtilsMessageCallback(
@ -340,7 +341,7 @@ int main(int argc, const char *argv[]) {
  vk::getHostVisibleMemory().initHostVisible(
      std::min(hostVisibleMemoryTotalSize / 2, 1ul * 1024 * 1024 * 1024));
  vk::getDeviceLocalMemory().initDeviceLocal(
-      std::min(localMemoryTotalSize / 2, 4ul * 1024 * 1024 * 1024));
+      std::min(localMemoryTotalSize / 4, 4ul * 1024 * 1024 * 1024));

  auto commandPool =
      vk::CommandPool::Create(vkContext.presentQueueFamily,
@ -398,7 +399,7 @@ int main(int argc, const char *argv[]) {
    if (gpIndex > GLFW_JOYSTICK_LAST) {
      for (int i = 0; i <= GLFW_JOYSTICK_LAST; ++i) {
        if (glfwJoystickIsGamepad(i) == GLFW_TRUE) {
-          std::printf("Gamepad \"%s\" activated", glfwGetGamepadName(i));
+          std::print("Gamepad \"{}\" activated", glfwGetGamepadName(i));
          gpIndex = i;
          break;
        }
--- a/rx/include/rx/AddressRange.hpp
+++ b/rx/include/rx/AddressRange.hpp
@ -0,0 +1,68 @@
+#pragma once
+
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+
+namespace rx {
+class AddressRange {
+  std::uint64_t mBeginAddress = -1;
+  std::uint64_t mEndAddress = 0;
+
+public:
+  constexpr AddressRange() = default;
+
+  [[nodiscard]] static constexpr AddressRange
+  fromBeginSize(std::uint64_t begin, std::uint64_t size) {
+    AddressRange result;
+    result.mBeginAddress = begin;
+    result.mEndAddress = begin + size;
+    return result;
+  }
+  [[nodiscard]] static constexpr AddressRange fromBeginEnd(std::uint64_t begin,
+                                                           std::uint64_t end) {
+    AddressRange result;
+    result.mBeginAddress = begin;
+    result.mEndAddress = end;
+    return result;
+  }
+
+  [[nodiscard]] constexpr bool isValid() const {
+    return mBeginAddress < mEndAddress;
+  }
+  constexpr explicit operator bool() const { return isValid(); }
+
+  [[nodiscard]] constexpr bool intersects(AddressRange other) const {
+    return mBeginAddress < other.mEndAddress &&
+           mEndAddress > other.mBeginAddress;
+  }
+  [[nodiscard]] constexpr bool contains(AddressRange other) const {
+    return mBeginAddress <= other.mBeginAddress &&
+           mEndAddress >= other.mEndAddress;
+  }
+  [[nodiscard]] constexpr bool contains(std::uint64_t address) const {
+    return address >= mBeginAddress && address < mEndAddress;
+  }
+
+  [[nodiscard]] constexpr AddressRange merge(AddressRange other) const {
+    return fromBeginEnd(std::min(mBeginAddress, other.mBeginAddress),
+                        std::max(mEndAddress, other.mEndAddress));
+  }
+
+  [[nodiscard]] constexpr AddressRange intersection(AddressRange other) const {
+    return fromBeginEnd(std::max(mBeginAddress, other.mBeginAddress),
+                        std::min(mEndAddress, other.mEndAddress));
+  }
+
+  [[nodiscard]] constexpr std::size_t size() const {
+    return mEndAddress - mBeginAddress;
+  }
+  [[nodiscard]] constexpr std::size_t beginAddress() const {
+    return mBeginAddress;
+  }
+  [[nodiscard]] constexpr std::size_t endAddress() const { return mEndAddress; }
+
+  constexpr bool operator==(const AddressRange &) const = default;
+};
+} // namespace rx
--- a/rx/include/rx/MemoryTable.hpp
+++ b/rx/include/rx/MemoryTable.hpp
@ -1,5 +1,6 @@
 #pragma once

+#include "rx/AddressRange.hpp"
 #include <cassert>
 #include <cstdint>
 #include <map>
@ -232,6 +233,10 @@ public:
      return {it->first, std::next(it)->first, it->second.second};
    }

+    rx::AddressRange range() const {
+      return rx::AddressRange::fromBeginEnd(beginAddress(), endAddress());
+    }
+
    std::uint64_t beginAddress() const { return it->first; }
    std::uint64_t endAddress() const { return std::next(it)->first; }
    std::uint64_t size() const { return endAddress() - beginAddress(); }
@ -307,7 +312,7 @@ public:
  }

  iterator map(std::uint64_t beginAddress, std::uint64_t endAddress,
-               PayloadT payload, bool merge = true) {
+               PayloadT payload, bool merge = true, bool noOverride = false) {
    assert(beginAddress < endAddress);
    auto [beginIt, beginInserted] =
        mAreas.emplace(beginAddress, std::pair{Kind::O, payload});
@ -318,6 +323,10 @@ public:
    bool endCollision = false;
    bool lastRemovedIsOpen = false;
    PayloadT lastRemovedOpenPayload;
+    if (noOverride && !beginInserted && !endInserted &&
+        std::next(beginIt) == endIt) {
+      return beginIt;
+    }

    if (!beginInserted || !endInserted) {
      if (!beginInserted) {