mirror of
https://github.com/xenia-project/xenia.git
synced 2025-12-06 07:12:03 +01:00
295 lines
12 KiB
C++
295 lines
12 KiB
C++
/**
|
|
******************************************************************************
|
|
* Xenia : Xbox 360 Emulator Research Project *
|
|
******************************************************************************
|
|
* Copyright 2018 Ben Vanik. All rights reserved. *
|
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
|
******************************************************************************
|
|
*/
|
|
|
|
#ifndef XENIA_GPU_D3D12_SHARED_MEMORY_H_
|
|
#define XENIA_GPU_D3D12_SHARED_MEMORY_H_
|
|
|
|
#include <memory>
|
|
#include <mutex>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#include "xenia/base/mutex.h"
|
|
#include "xenia/gpu/trace_writer.h"
|
|
#include "xenia/memory.h"
|
|
#include "xenia/ui/d3d12/d3d12_api.h"
|
|
#include "xenia/ui/d3d12/pools.h"
|
|
|
|
namespace xe {
|
|
namespace gpu {
|
|
namespace d3d12 {
|
|
|
|
class D3D12CommandProcessor;
|
|
|
|
// Manages memory for unconverted textures, resolve targets, vertex and index
|
|
// buffers that can be accessed from shaders with Xenon physical addresses, with
|
|
// system page size granularity.
|
|
class SharedMemory {
|
|
public:
|
|
SharedMemory(D3D12CommandProcessor* command_processor, Memory* memory,
|
|
TraceWriter* trace_writer);
|
|
~SharedMemory();
|
|
|
|
bool Initialize();
|
|
void Shutdown();
|
|
|
|
ID3D12Resource* GetBuffer() const { return buffer_; }
|
|
D3D12_GPU_VIRTUAL_ADDRESS GetGPUAddress() const {
|
|
return buffer_gpu_address_;
|
|
}
|
|
|
|
void BeginFrame();
|
|
|
|
typedef void (*GlobalWatchCallback)(void* context, uint32_t address_first,
|
|
uint32_t address_last,
|
|
bool invalidated_by_gpu);
|
|
typedef void* GlobalWatchHandle;
|
|
// Registers a callback invoked when something is invalidated in the GPU
|
|
// memory copy by the CPU or (if triggered explicitly - such as by a resolve)
|
|
// by the GPU. It will be fired for writes to pages previously requested, but
|
|
// may also be fired regardless of whether it was used by GPU emulation - for
|
|
// example, if the game changes protection level of a memory range containing
|
|
// the watched range.
|
|
//
|
|
// The callback is called with the mutex locked.
|
|
GlobalWatchHandle RegisterGlobalWatch(GlobalWatchCallback callback,
|
|
void* callback_context);
|
|
void UnregisterGlobalWatch(GlobalWatchHandle handle);
|
|
typedef void (*WatchCallback)(void* context, void* data, uint64_t argument,
|
|
bool invalidated_by_gpu);
|
|
typedef void* WatchHandle;
|
|
// Registers a callback invoked when the specified memory range is invalidated
|
|
// in the GPU memory copy by the CPU or (if triggered explicitly - such as by
|
|
// a resolve) by the GPU. It will be fired for writes to pages previously
|
|
// requested, but may also be fired regardless of whether it was used by GPU
|
|
// emulation - for example, if the game changes protection level of a memory
|
|
// range containing the watched range.
|
|
//
|
|
// Generally the context is the subsystem pointer (for example, the texture
|
|
// cache), the data is the object (such as a texture), and the argument is
|
|
// additional subsystem/object-specific data (such as whether the range
|
|
// belongs to the base mip level or to the rest of the mips).
|
|
//
|
|
// Called with the global critical region locked. Do NOT watch or unwatch
|
|
// ranges from within it! The watch for the callback is cancelled after the
|
|
// callback - the handle becomes invalid.
|
|
WatchHandle WatchMemoryRange(uint32_t start, uint32_t length,
|
|
WatchCallback callback, void* callback_context,
|
|
void* callback_data, uint64_t callback_argument);
|
|
// Unregisters previously registered watched memory range.
|
|
void UnwatchMemoryRange(WatchHandle handle);
|
|
// Locks the mutex that gets locked when watch callbacks are invoked - must be
|
|
// done when checking variables that may be changed by a watch callback.
|
|
inline std::unique_lock<std::recursive_mutex> LockWatchMutex() {
|
|
return global_critical_region_.Acquire();
|
|
}
|
|
|
|
// Ensures the buffer tiles backing the range are resident, but doesn't upload
|
|
// anything.
|
|
bool MakeTilesResident(uint32_t start, uint32_t length);
|
|
|
|
// Checks if the range has been updated, uploads new data if needed and
|
|
// ensures the buffer tiles backing the range are resident. May transition the
|
|
// tiled buffer to copy destination - call this before UseForReading or
|
|
// UseForWriting. Returns true if the range has been fully updated and is
|
|
// usable.
|
|
bool RequestRange(uint32_t start, uint32_t length);
|
|
|
|
// Marks the range and, if not exact_range, potentially its surroundings
|
|
// (to up to the first GPU-written page, as an access violation exception
|
|
// count optimization) as modified by the CPU, also invalidating GPU-written
|
|
// pages directly in the range.
|
|
std::pair<uint32_t, uint32_t> MemoryWriteCallback(
|
|
uint32_t physical_address_start, uint32_t length, bool exact_range);
|
|
|
|
// Marks the range as containing GPU-generated data (such as resolves),
|
|
// triggering modification callbacks, making it valid (so pages are not
|
|
// copied from the main memory until they're modified by the CPU) and
|
|
// protecting it.
|
|
void RangeWrittenByGPU(uint32_t start, uint32_t length);
|
|
|
|
// Makes the buffer usable for vertices, indices and texture untiling.
|
|
inline void UseForReading() {
|
|
// Vertex fetch is also allowed in pixel shaders.
|
|
TransitionBuffer(D3D12_RESOURCE_STATE_INDEX_BUFFER |
|
|
D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE |
|
|
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
|
|
}
|
|
// Makes the buffer usable for texture tiling after a resolve.
|
|
inline void UseForWriting() {
|
|
TransitionBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
|
}
|
|
// Makes the buffer usable as a source for copy commands.
|
|
inline void UseAsCopySource() {
|
|
TransitionBuffer(D3D12_RESOURCE_STATE_COPY_SOURCE);
|
|
}
|
|
|
|
void WriteRawSRVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
|
void WriteRawUAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
|
|
|
// Returns true if any downloads were submitted to the command processor.
|
|
bool InitializeTraceSubmitDownloads();
|
|
void InitializeTraceCompleteDownloads();
|
|
|
|
private:
|
|
bool AreTiledResourcesUsed() const;
|
|
|
|
// Mark the memory range as updated and protect it.
|
|
void MakeRangeValid(uint32_t valid_page_first, uint32_t valid_page_count,
|
|
bool written_by_gpu);
|
|
|
|
D3D12CommandProcessor* command_processor_;
|
|
Memory* memory_;
|
|
TraceWriter* trace_writer_;
|
|
|
|
// The 512 MB tiled buffer.
|
|
static constexpr uint32_t kBufferSizeLog2 = 29;
|
|
static constexpr uint32_t kBufferSize = 1 << kBufferSizeLog2;
|
|
ID3D12Resource* buffer_ = nullptr;
|
|
D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address_ = 0;
|
|
D3D12_RESOURCE_STATES buffer_state_ = D3D12_RESOURCE_STATE_COPY_DEST;
|
|
|
|
// Heaps are 4 MB, so not too many of them are allocated, but also not to
|
|
// waste too much memory for padding (with 16 MB there's too much).
|
|
static constexpr uint32_t kHeapSizeLog2 = 22;
|
|
static constexpr uint32_t kHeapSize = 1 << kHeapSizeLog2;
|
|
static_assert((kHeapSize % D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES) == 0,
|
|
"Heap size must be a multiple of Direct3D tile size");
|
|
// Resident portions of the tiled buffer.
|
|
ID3D12Heap* heaps_[kBufferSize >> kHeapSizeLog2] = {};
|
|
// Number of the heaps currently resident, for profiling.
|
|
uint32_t heap_count_ = 0;
|
|
// Whether creation of a heap has failed in the current frame.
|
|
bool heap_creation_failed_ = false;
|
|
|
|
// Log2 of system page size.
|
|
uint32_t page_size_log2_;
|
|
// Total physical page count.
|
|
uint32_t page_count_;
|
|
|
|
// Non-shader-visible buffer descriptor heap for faster binding (via copying
|
|
// rather than creation).
|
|
enum class BufferDescriptorIndex : uint32_t {
|
|
kRawSRV,
|
|
kRawUAV,
|
|
|
|
kCount,
|
|
};
|
|
ID3D12DescriptorHeap* buffer_descriptor_heap_ = nullptr;
|
|
D3D12_CPU_DESCRIPTOR_HANDLE buffer_descriptor_heap_start_;
|
|
|
|
// Handle of the physical memory write callback.
|
|
void* physical_write_watch_handle_ = nullptr;
|
|
|
|
// Mutex between the exception handler and the command processor, to be locked
|
|
// when checking or updating validity of pages/ranges.
|
|
xe::global_critical_region global_critical_region_;
|
|
|
|
// ***************************************************************************
|
|
// Things below should be protected by global_critical_region.
|
|
// ***************************************************************************
|
|
|
|
// Bit vector containing:
|
|
// - Even block indices - whether physical memory system pages are up to date.
|
|
// - Odd block indices - whether phyical memory system pages contain data
|
|
// written by the GPU not synchronized with the CPU (subset of valid pages).
|
|
std::vector<uint64_t> valid_and_gpu_written_pages_;
|
|
|
|
static std::pair<uint32_t, uint32_t> MemoryWriteCallbackThunk(
|
|
void* context_ptr, uint32_t physical_address_start, uint32_t length,
|
|
bool exact_range);
|
|
|
|
struct GlobalWatch {
|
|
GlobalWatchCallback callback;
|
|
void* callback_context;
|
|
};
|
|
std::vector<GlobalWatch*> global_watches_;
|
|
struct WatchNode;
|
|
// Watched range placed by other GPU subsystems.
|
|
struct WatchRange {
|
|
union {
|
|
struct {
|
|
WatchCallback callback;
|
|
void* callback_context;
|
|
void* callback_data;
|
|
uint64_t callback_argument;
|
|
WatchNode* node_first;
|
|
uint32_t page_first;
|
|
uint32_t page_last;
|
|
};
|
|
WatchRange* next_free;
|
|
};
|
|
};
|
|
// Node for faster checking of watches when pages have been written to - all
|
|
// 512 MB are split into smaller equally sized buckets, and then ranges are
|
|
// linearly checked.
|
|
struct WatchNode {
|
|
union {
|
|
struct {
|
|
WatchRange* range;
|
|
// Link to another node of this watched range in the next bucket.
|
|
WatchNode* range_node_next;
|
|
// Links to nodes belonging to other watched ranges in the bucket.
|
|
WatchNode* bucket_node_previous;
|
|
WatchNode* bucket_node_next;
|
|
};
|
|
WatchNode* next_free;
|
|
};
|
|
};
|
|
static constexpr uint32_t kWatchBucketSizeLog2 = 22;
|
|
static constexpr uint32_t kWatchBucketCount =
|
|
1 << (kBufferSizeLog2 - kWatchBucketSizeLog2);
|
|
WatchNode* watch_buckets_[kWatchBucketCount] = {};
|
|
// Allocation from pools - taking new WatchRanges and WatchNodes from the free
|
|
// list, and if there are none, creating a pool if the current one is fully
|
|
// used, and linearly allocating from the current pool.
|
|
static constexpr uint32_t kWatchRangePoolSize = 8192;
|
|
static constexpr uint32_t kWatchNodePoolSize = 8192;
|
|
std::vector<WatchRange*> watch_range_pools_;
|
|
std::vector<WatchNode*> watch_node_pools_;
|
|
uint32_t watch_range_current_pool_allocated_ = 0;
|
|
uint32_t watch_node_current_pool_allocated_ = 0;
|
|
WatchRange* watch_range_first_free_ = nullptr;
|
|
WatchNode* watch_node_first_free_ = nullptr;
|
|
// Triggers the watches (global and per-range), removing triggered range
|
|
// watches.
|
|
void FireWatches(uint32_t page_first, uint32_t page_last,
|
|
bool invalidated_by_gpu);
|
|
// Unlinks and frees the range and its nodes. Call this with the mutex locked.
|
|
void UnlinkWatchRange(WatchRange* range);
|
|
|
|
// ***************************************************************************
|
|
// Things above should be protected by global_critical_region.
|
|
// ***************************************************************************
|
|
|
|
// First page and length in pages.
|
|
typedef std::pair<uint32_t, uint32_t> UploadRange;
|
|
// Ranges that need to be uploaded, generated by GetRangesToUpload (a
|
|
// persistently allocated vector).
|
|
std::vector<UploadRange> upload_ranges_;
|
|
void GetRangesToUpload(uint32_t request_page_first,
|
|
uint32_t request_page_last);
|
|
std::unique_ptr<ui::d3d12::UploadBufferPool> upload_buffer_pool_ = nullptr;
|
|
|
|
void TransitionBuffer(D3D12_RESOURCE_STATES new_state);
|
|
|
|
// GPU-written memory downloading for traces.
|
|
// Start page, length in pages.
|
|
std::vector<std::pair<uint32_t, uint32_t>> trace_gpu_written_ranges_;
|
|
// Created temporarily, only for downloading.
|
|
ID3D12Resource* trace_gpu_written_buffer_ = nullptr;
|
|
void ResetTraceGPUWrittenBuffer();
|
|
};
|
|
|
|
} // namespace d3d12
|
|
} // namespace gpu
|
|
} // namespace xe
|
|
|
|
#endif // XENIA_GPU_D3D12_SHARED_MEMORY_H_
|