From bbabbc243965658e7f44cf4dadf7d9e3617c7ec7 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Wed, 25 Jul 2018 15:04:14 +0300 Subject: [PATCH] [D3D12] SHM - mark pages as modified in the beginning of a frame --- .../gpu/d3d12/d3d12_command_processor.cc | 4 +- src/xenia/gpu/d3d12/shared_memory.cc | 54 ++++++++++++++++--- src/xenia/gpu/d3d12/shared_memory.h | 41 +++++++++++--- 3 files changed, 85 insertions(+), 14 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index d26627229..1943507d3 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -51,7 +51,7 @@ bool D3D12CommandProcessor::SetupContext() { } } - shared_memory_ = std::make_unique(context); + shared_memory_ = std::make_unique(memory_, context); if (!shared_memory_->Initialize()) { XELOGE("Failed to initialize shared memory"); return false; @@ -164,6 +164,8 @@ bool D3D12CommandProcessor::BeginFrame() { command_lists_setup_[current_queue_frame_]->BeginRecording(); command_lists_[current_queue_frame_]->BeginRecording(); + shared_memory_->BeginFrame(); + return true; } diff --git a/src/xenia/gpu/d3d12/shared_memory.cc b/src/xenia/gpu/d3d12/shared_memory.cc index be69bfb1b..bdc161802 100644 --- a/src/xenia/gpu/d3d12/shared_memory.cc +++ b/src/xenia/gpu/d3d12/shared_memory.cc @@ -19,10 +19,17 @@ namespace xe { namespace gpu { namespace d3d12 { -SharedMemory::SharedMemory(ui::d3d12::D3D12Context* context) - : context_(context) { +SharedMemory::SharedMemory(Memory* memory, ui::d3d12::D3D12Context* context) + : memory_(memory), context_(context) { page_size_log2_ = xe::math::log2_ceil(xe::memory::page_size()); - pages_in_sync_.resize(kBufferSize >> page_size_log2_ >> 6); + page_count_ = kBufferSize >> page_size_log2_; + uint32_t page_bitmap_length = page_count_ >> 6; + + pages_in_sync_.resize(page_bitmap_length); + + watched_pages_.resize(page_bitmap_length); + watches_triggered_l1_.resize(page_bitmap_length); + watches_triggered_l2_.resize(page_bitmap_length >> 6); } SharedMemory::~SharedMemory() { Shutdown(); } @@ -51,10 +58,16 @@ bool SharedMemory::Initialize() { } std::memset(heaps_, 0, sizeof(heaps_)); + heap_creation_failed_ = false; std::memset(pages_in_sync_.data(), 0, page_in_sync_.size() * sizeof(uint64_t)); + std::memset(watched_pages_.data(), 0, + watched_pages_.size() * sizeof(uint64_t)); + std::memset(watches_triggered_l2_.data(), 0, + watches_triggered_l2_.size() * sizeof(uint64_t)); + return true; } @@ -73,7 +86,25 @@ void SharedMemory::Shutdown() { } } -bool SharedMemory::EnsureRangeAllocated(uint32_t start, uint32_t length) { +void SharedMemory::BeginFrame() { + // Check triggered watches, clear them and mark modified pages as out of date. + watch_mutex_.lock(); + for (uint32_t i = 0; i < watches_triggered_l2_.size(); ++i) { + uint64_t bits_l2 = watches_triggered_l2_[i]; + uint32_t index_l2; + while (xe::bit_scan_forward(bits_l2, &index_l2)) { + bits_l2 &= ~(1ull << index_l2); + uint32_t index_l1 = (i << 6) + index_l2; + pages_in_sync_[index_l1] &= ~(watches_triggered_l1[index_l1]); + } + watches_triggered_l2_[i] = 0; + } + watch_mutex_.unlock(); + + heap_creation_failed_ = false; +} + +bool SharedMemory::UseRange(uint32_t start, uint32_t length) { if (length == 0) { // Some texture is empty, for example - safe to draw in this case. return true; @@ -83,14 +114,19 @@ bool SharedMemory::EnsureRangeAllocated(uint32_t start, uint32_t length) { // Exceeds the physical address space. return false; } + + // Ensure all tile heaps are present. uint32_t heap_first = start >> kHeapSizeLog2; uint32_t heap_last = (start + length - 1) >> kHeapSizeLog2; for (uint32_t i = heap_first; i <= heap_last; ++i) { if (heaps_[i] != nullptr) { continue; } - // TODO(Triang3l): If heap creation has failed at least once in this frame, - // don't try to allocate heaps until the next frame. + if (heap_creation_failed_) { + // Don't try to create a heap for every vertex buffer or texture in the + // current frame anymore if have failed at least once. + return false; + } auto provider = context_->GetD3D12Provider(); auto device = provider->GetDevice(); auto direct_queue = provider->GetDirectQueue(); @@ -98,6 +134,7 @@ bool SharedMemory::EnsureRangeAllocated(uint32_t start, uint32_t length) { heap_desc.SizeInBytes = kHeapSize; heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; if (FAILED(device->CreateHeap(&heap_desc, IID_PPV_ARGS(&heaps_[i])))) { + heap_creation_failed_ = true; return false; } D3D12_TILED_RESOURCE_COORDINATE region_start_coordinates; @@ -111,11 +148,16 @@ bool SharedMemory::EnsureRangeAllocated(uint32_t start, uint32_t length) { D3D12_TILE_RANGE_FLAGS range_flags = D3D12_TILE_RANGE_FLAG_NONE; UINT heap_range_start_offset = 0; UINT range_tile_count = kHeapSize >> kTileSizeLog2; + // FIXME(Triang3l): This may cause issues if the emulator is shut down + // mid-frame and the heaps are destroyed before tile mappings are updated + // (AwaitAllFramesCompletion won't catch this then). Defer this until the + // actual command list submission at the end of the frame. direct_queue->UpdateTileMappings( buffer_, 1, ®ion_start_coordinates, ®ion_size, heaps_[i], 1, &range_flags, &heap_range_start_offset, &range_tile_count, D3D12_TILE_MAPPING_FLAG_NONE); } + // TODO(Triang3l): Mark the range for upload. return true; } diff --git a/src/xenia/gpu/d3d12/shared_memory.h b/src/xenia/gpu/d3d12/shared_memory.h index 5ae300db5..89a42185b 100644 --- a/src/xenia/gpu/d3d12/shared_memory.h +++ b/src/xenia/gpu/d3d12/shared_memory.h @@ -10,6 +10,9 @@ #ifndef XENIA_GPU_D3D12_SHARED_MEMORY_H_ #define XENIA_GPU_D3D12_SHARED_MEMORY_H_ +#include + +#include "xenia/memory.h" #include "xenia/ui/d3d12/d3d12_api.h" #include "xenia/ui/d3d12/d3d12_context.h" @@ -19,22 +22,28 @@ namespace d3d12 { // Manages memory for unconverted textures, resolve targets, vertex and index // buffers that can be accessed from shaders with Xenon physical addresses, with -// 4 KB granularity. +// system page size granularity. class SharedMemory { public: - SharedMemory(ui::d3d12::D3D12Context* context); + SharedMemory(Memory* memory, ui::d3d12::D3D12Context* context); ~SharedMemory(); bool Initialize(); void Shutdown(); - // Ensures the backing memory for the address range is present in the tiled - // buffer, allocating if needed. If couldn't allocate, false is returned - - // it's unsafe to use this portion (on tiled resources tier 1 at least). - bool EnsureRangeAllocated(uint32_t start, uint32_t length); + void BeginFrame(); + + // Marks the range as used in this frame, queues it for upload if it was + // modified. Ensures the backing memory for the address range is present in + // the tiled buffer, allocating if needed. If couldn't allocate, false is + // returned - it's unsafe to use this portion (on tiled resources tier 1 at + // least). + bool UseRange(uint32_t start, uint32_t length); private: - ui::d3d12::D3D12Context* context_ = nullptr; + Memory* memory_; + + ui::d3d12::D3D12Context* context_; // The 512 MB tiled buffer. static constexpr uint32_t kBufferSizeLog2 = 29; @@ -51,11 +60,29 @@ class SharedMemory { static constexpr uint32_t kHeapSize = 1 << kHeapSizeLog2; // Resident portions of the tiled buffer. ID3D12Heap* heaps_[kBufferSize >> kHeapSizeLog2] = {}; + // Whether creation of a heap has failed in the current frame. + bool heap_creation_failed_ = false; // Log2 of system page size. uint32_t page_size_log2_; + // Total physical page count. + uint32_t page_count_; + // Bit vector containing whether physical memory system pages are up to date. std::vector pages_in_sync_; + + // Watched page management - must be synchronized. + std::mutex watch_mutex_; + // Whether each physical page is watched by the GPU (after uploading). + // Once a watch is triggered, it's not watched anymore. + std::vector watched_pages_; + // Whether each page was modified while the current frame is being processed. + // This is checked and cleared in the beginning of a GPU frame. + // Because this is done with a locked CPU-GPU mutex, it's stored in 2 levels, + // so unmodified pages can be skipped quickly, and clearing is also fast. + // On L1, each bit corresponds to a single page, on L2, to 64 pages. + std::vector watches_triggered_l1_; + std::vector watches_triggered_l2_; }; } // namespace d3d12