/** ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ #include #include #include #include #include "xenia/base/assert.h" #include "xenia/base/byte_order.h" #include "xenia/base/cvar.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" #include "xenia/base/profiling.h" #include "xenia/gpu/d3d12/d3d12_command_processor.h" #include "xenia/gpu/d3d12/d3d12_graphics_system.h" #include "xenia/gpu/d3d12/d3d12_shader.h" #include "xenia/gpu/draw_util.h" #include "xenia/gpu/gpu_flags.h" #include "xenia/gpu/registers.h" #include "xenia/gpu/xenos.h" #include "xenia/ui/d3d12/d3d12_presenter.h" #include "xenia/ui/d3d12/d3d12_util.h" DEFINE_bool(d3d12_bindless, true, "Use bindless resources where available - may improve performance, " "but may make debugging more complicated.", "D3D12"); DEFINE_bool(d3d12_readback_memexport, false, "Read data written by memory export in shaders on the CPU. This " "may be needed in some games (but many only access exported data " "on the GPU, and this flag isn't needed to handle such behavior), " "but causes mid-frame synchronization, so it has a huge " "performance impact.", "D3D12"); DEFINE_bool(d3d12_readback_resolve, false, "Read render-to-texture results on the CPU. This may be needed in " "some games, for instance, for screenshots in saved games, but " "causes mid-frame synchronization, so it has a huge performance " "impact.", "D3D12"); DEFINE_bool(d3d12_submit_on_primary_buffer_end, true, "Submit the command list when a PM4 primary buffer ends if it's " "possible to submit immediately to try to reduce frame latency.", "D3D12"); namespace xe { namespace gpu { namespace d3d12 { // Generated with `xb buildshaders`. namespace shaders { #include "xenia/gpu/shaders/bytecode/d3d12_5_1/apply_gamma_pwl_cs.h" #include "xenia/gpu/shaders/bytecode/d3d12_5_1/apply_gamma_pwl_fxaa_luma_cs.h" #include "xenia/gpu/shaders/bytecode/d3d12_5_1/apply_gamma_table_cs.h" #include "xenia/gpu/shaders/bytecode/d3d12_5_1/apply_gamma_table_fxaa_luma_cs.h" #include "xenia/gpu/shaders/bytecode/d3d12_5_1/fxaa_cs.h" #include "xenia/gpu/shaders/bytecode/d3d12_5_1/fxaa_extreme_cs.h" } // namespace shaders D3D12CommandProcessor::D3D12CommandProcessor( D3D12GraphicsSystem* graphics_system, kernel::KernelState* kernel_state) : CommandProcessor(graphics_system, kernel_state), deferred_command_list_(*this) {} D3D12CommandProcessor::~D3D12CommandProcessor() = default; void D3D12CommandProcessor::ClearCaches() { CommandProcessor::ClearCaches(); cache_clear_requested_ = true; } void D3D12CommandProcessor::InitializeShaderStorage( const std::filesystem::path& cache_root, uint32_t title_id, bool blocking) { CommandProcessor::InitializeShaderStorage(cache_root, title_id, blocking); pipeline_cache_->InitializeShaderStorage(cache_root, title_id, blocking); } void D3D12CommandProcessor::RequestFrameTrace( const std::filesystem::path& root_path) { // Capture with PIX if attached. if (GetD3D12Provider().GetGraphicsAnalysis() != nullptr) { pix_capture_requested_.store(true, std::memory_order_relaxed); return; } CommandProcessor::RequestFrameTrace(root_path); } void D3D12CommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) { shared_memory_->MemoryInvalidationCallback(base_ptr, length, true); primitive_processor_->MemoryInvalidationCallback(base_ptr, length, true); } void D3D12CommandProcessor::RestoreEdramSnapshot(const void* snapshot) { // Starting a new frame because descriptors may be needed. if (!BeginSubmission(true)) { return; } render_target_cache_->RestoreEdramSnapshot(snapshot); } bool D3D12CommandProcessor::PushTransitionBarrier( ID3D12Resource* resource, D3D12_RESOURCE_STATES old_state, D3D12_RESOURCE_STATES new_state, UINT subresource) { if (old_state == new_state) { return false; } D3D12_RESOURCE_BARRIER barrier; barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; barrier.Transition.pResource = resource; barrier.Transition.Subresource = subresource; barrier.Transition.StateBefore = old_state; barrier.Transition.StateAfter = new_state; barriers_.push_back(barrier); return true; } void D3D12CommandProcessor::PushAliasingBarrier(ID3D12Resource* old_resource, ID3D12Resource* new_resource) { D3D12_RESOURCE_BARRIER barrier; barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING; barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; barrier.Aliasing.pResourceBefore = old_resource; barrier.Aliasing.pResourceAfter = new_resource; barriers_.push_back(barrier); } void D3D12CommandProcessor::PushUAVBarrier(ID3D12Resource* resource) { D3D12_RESOURCE_BARRIER barrier; barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; barrier.UAV.pResource = resource; barriers_.push_back(barrier); } void D3D12CommandProcessor::SubmitBarriers() { UINT barrier_count = UINT(barriers_.size()); if (barrier_count != 0) { deferred_command_list_.D3DResourceBarrier(barrier_count, barriers_.data()); barriers_.clear(); } } ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature( const DxbcShader* vertex_shader, const DxbcShader* pixel_shader, bool tessellated) { if (bindless_resources_used_) { return tessellated ? root_signature_bindless_ds_ : root_signature_bindless_vs_; } D3D12_SHADER_VISIBILITY vertex_visibility = tessellated ? D3D12_SHADER_VISIBILITY_DOMAIN : D3D12_SHADER_VISIBILITY_VERTEX; uint32_t texture_count_vertex = uint32_t(vertex_shader->GetTextureBindingsAfterTranslation().size()); uint32_t sampler_count_vertex = uint32_t(vertex_shader->GetSamplerBindingsAfterTranslation().size()); uint32_t texture_count_pixel = pixel_shader ? uint32_t(pixel_shader->GetTextureBindingsAfterTranslation().size()) : 0; uint32_t sampler_count_pixel = pixel_shader ? uint32_t(pixel_shader->GetSamplerBindingsAfterTranslation().size()) : 0; // Better put the pixel texture/sampler in the lower bits probably because it // changes often. uint32_t index = 0; uint32_t index_offset = 0; index |= texture_count_pixel << index_offset; index_offset += D3D12Shader::kMaxTextureBindingIndexBits; index |= sampler_count_pixel << index_offset; index_offset += D3D12Shader::kMaxSamplerBindingIndexBits; index |= texture_count_vertex << index_offset; index_offset += D3D12Shader::kMaxTextureBindingIndexBits; index |= sampler_count_vertex << index_offset; index_offset += D3D12Shader::kMaxSamplerBindingIndexBits; index |= uint32_t(vertex_visibility == D3D12_SHADER_VISIBILITY_DOMAIN) << index_offset; ++index_offset; assert_true(index_offset <= 32); // Try an existing root signature. auto it = root_signatures_bindful_.find(index); if (it != root_signatures_bindful_.end()) { return it->second; } // Create a new one. D3D12_ROOT_SIGNATURE_DESC desc; D3D12_ROOT_PARAMETER parameters[kRootParameter_Bindful_Count_Max]; desc.NumParameters = kRootParameter_Bindful_Count_Base; desc.pParameters = parameters; desc.NumStaticSamplers = 0; desc.pStaticSamplers = nullptr; desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; // Base parameters. // Fetch constants. { auto& parameter = parameters[kRootParameter_Bindful_FetchConstants]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; parameter.Descriptor.ShaderRegister = uint32_t(DxbcShaderTranslator::CbufferRegister::kFetchConstants); parameter.Descriptor.RegisterSpace = 0; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; } // Vertex float constants. { auto& parameter = parameters[kRootParameter_Bindful_FloatConstantsVertex]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; parameter.Descriptor.ShaderRegister = uint32_t(DxbcShaderTranslator::CbufferRegister::kFloatConstants); parameter.Descriptor.RegisterSpace = 0; parameter.ShaderVisibility = vertex_visibility; } // Pixel float constants. { auto& parameter = parameters[kRootParameter_Bindful_FloatConstantsPixel]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; parameter.Descriptor.ShaderRegister = uint32_t(DxbcShaderTranslator::CbufferRegister::kFloatConstants); parameter.Descriptor.RegisterSpace = 0; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; } // System constants. { auto& parameter = parameters[kRootParameter_Bindful_SystemConstants]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; parameter.Descriptor.ShaderRegister = uint32_t(DxbcShaderTranslator::CbufferRegister::kSystemConstants); parameter.Descriptor.RegisterSpace = 0; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; } // Bool and loop constants. { auto& parameter = parameters[kRootParameter_Bindful_BoolLoopConstants]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; parameter.Descriptor.ShaderRegister = uint32_t(DxbcShaderTranslator::CbufferRegister::kBoolLoopConstants); parameter.Descriptor.RegisterSpace = 0; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; } // Shared memory and, if ROVs are used, EDRAM. D3D12_DESCRIPTOR_RANGE shared_memory_and_edram_ranges[3]; { auto& parameter = parameters[kRootParameter_Bindful_SharedMemoryAndEdram]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; parameter.DescriptorTable.NumDescriptorRanges = 2; parameter.DescriptorTable.pDescriptorRanges = shared_memory_and_edram_ranges; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; shared_memory_and_edram_ranges[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; shared_memory_and_edram_ranges[0].NumDescriptors = 1; shared_memory_and_edram_ranges[0].BaseShaderRegister = uint32_t(DxbcShaderTranslator::SRVMainRegister::kSharedMemory); shared_memory_and_edram_ranges[0].RegisterSpace = uint32_t(DxbcShaderTranslator::SRVSpace::kMain); shared_memory_and_edram_ranges[0].OffsetInDescriptorsFromTableStart = 0; shared_memory_and_edram_ranges[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; shared_memory_and_edram_ranges[1].NumDescriptors = 1; shared_memory_and_edram_ranges[1].BaseShaderRegister = UINT(DxbcShaderTranslator::UAVRegister::kSharedMemory); shared_memory_and_edram_ranges[1].RegisterSpace = 0; shared_memory_and_edram_ranges[1].OffsetInDescriptorsFromTableStart = 1; if (render_target_cache_->GetPath() == RenderTargetCache::Path::kPixelShaderInterlock) { ++parameter.DescriptorTable.NumDescriptorRanges; shared_memory_and_edram_ranges[2].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; shared_memory_and_edram_ranges[2].NumDescriptors = 1; shared_memory_and_edram_ranges[2].BaseShaderRegister = UINT(DxbcShaderTranslator::UAVRegister::kEdram); shared_memory_and_edram_ranges[2].RegisterSpace = 0; shared_memory_and_edram_ranges[2].OffsetInDescriptorsFromTableStart = 2; } } // Extra parameters. // Pixel textures. D3D12_DESCRIPTOR_RANGE range_textures_pixel; if (texture_count_pixel > 0) { auto& parameter = parameters[desc.NumParameters]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; parameter.DescriptorTable.NumDescriptorRanges = 1; parameter.DescriptorTable.pDescriptorRanges = &range_textures_pixel; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; range_textures_pixel.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; range_textures_pixel.NumDescriptors = texture_count_pixel; range_textures_pixel.BaseShaderRegister = uint32_t(DxbcShaderTranslator::SRVMainRegister::kBindfulTexturesStart); range_textures_pixel.RegisterSpace = uint32_t(DxbcShaderTranslator::SRVSpace::kMain); range_textures_pixel.OffsetInDescriptorsFromTableStart = 0; ++desc.NumParameters; } // Pixel samplers. D3D12_DESCRIPTOR_RANGE range_samplers_pixel; if (sampler_count_pixel > 0) { auto& parameter = parameters[desc.NumParameters]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; parameter.DescriptorTable.NumDescriptorRanges = 1; parameter.DescriptorTable.pDescriptorRanges = &range_samplers_pixel; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; range_samplers_pixel.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; range_samplers_pixel.NumDescriptors = sampler_count_pixel; range_samplers_pixel.BaseShaderRegister = 0; range_samplers_pixel.RegisterSpace = 0; range_samplers_pixel.OffsetInDescriptorsFromTableStart = 0; ++desc.NumParameters; } // Vertex textures. D3D12_DESCRIPTOR_RANGE range_textures_vertex; if (texture_count_vertex > 0) { auto& parameter = parameters[desc.NumParameters]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; parameter.DescriptorTable.NumDescriptorRanges = 1; parameter.DescriptorTable.pDescriptorRanges = &range_textures_vertex; parameter.ShaderVisibility = vertex_visibility; range_textures_vertex.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; range_textures_vertex.NumDescriptors = texture_count_vertex; range_textures_vertex.BaseShaderRegister = uint32_t(DxbcShaderTranslator::SRVMainRegister::kBindfulTexturesStart); range_textures_vertex.RegisterSpace = uint32_t(DxbcShaderTranslator::SRVSpace::kMain); range_textures_vertex.OffsetInDescriptorsFromTableStart = 0; ++desc.NumParameters; } // Vertex samplers. D3D12_DESCRIPTOR_RANGE range_samplers_vertex; if (sampler_count_vertex > 0) { auto& parameter = parameters[desc.NumParameters]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; parameter.DescriptorTable.NumDescriptorRanges = 1; parameter.DescriptorTable.pDescriptorRanges = &range_samplers_vertex; parameter.ShaderVisibility = vertex_visibility; range_samplers_vertex.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; range_samplers_vertex.NumDescriptors = sampler_count_vertex; range_samplers_vertex.BaseShaderRegister = 0; range_samplers_vertex.RegisterSpace = 0; range_samplers_vertex.OffsetInDescriptorsFromTableStart = 0; ++desc.NumParameters; } ID3D12RootSignature* root_signature = ui::d3d12::util::CreateRootSignature(GetD3D12Provider(), desc); if (root_signature == nullptr) { XELOGE( "Failed to create a root signature with {} pixel textures, {} pixel " "samplers, {} vertex textures and {} vertex samplers", texture_count_pixel, sampler_count_pixel, texture_count_vertex, sampler_count_vertex); return nullptr; } root_signatures_bindful_.emplace(index, root_signature); return root_signature; } uint32_t D3D12CommandProcessor::GetRootBindfulExtraParameterIndices( const DxbcShader* vertex_shader, const DxbcShader* pixel_shader, RootBindfulExtraParameterIndices& indices_out) { uint32_t index = kRootParameter_Bindful_Count_Base; if (pixel_shader && !pixel_shader->GetTextureBindingsAfterTranslation().empty()) { indices_out.textures_pixel = index++; } else { indices_out.textures_pixel = RootBindfulExtraParameterIndices::kUnavailable; } if (pixel_shader && !pixel_shader->GetSamplerBindingsAfterTranslation().empty()) { indices_out.samplers_pixel = index++; } else { indices_out.samplers_pixel = RootBindfulExtraParameterIndices::kUnavailable; } if (!vertex_shader->GetTextureBindingsAfterTranslation().empty()) { indices_out.textures_vertex = index++; } else { indices_out.textures_vertex = RootBindfulExtraParameterIndices::kUnavailable; } if (!vertex_shader->GetSamplerBindingsAfterTranslation().empty()) { indices_out.samplers_vertex = index++; } else { indices_out.samplers_vertex = RootBindfulExtraParameterIndices::kUnavailable; } return index; } uint64_t D3D12CommandProcessor::RequestViewBindfulDescriptors( uint64_t previous_heap_index, uint32_t count_for_partial_update, uint32_t count_for_full_update, D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out, D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out) { assert_false(bindless_resources_used_); assert_true(submission_open_); uint32_t descriptor_index; uint64_t current_heap_index = view_bindful_heap_pool_->Request( frame_current_, previous_heap_index, count_for_partial_update, count_for_full_update, descriptor_index); if (current_heap_index == ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid) { // There was an error. return ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid; } ID3D12DescriptorHeap* heap = view_bindful_heap_pool_->GetLastRequestHeap(); if (view_bindful_heap_current_ != heap) { view_bindful_heap_current_ = heap; deferred_command_list_.SetDescriptorHeaps(view_bindful_heap_current_, sampler_bindful_heap_current_); } const ui::d3d12::D3D12Provider& provider = GetD3D12Provider(); cpu_handle_out = provider.OffsetViewDescriptor( view_bindful_heap_pool_->GetLastRequestHeapCPUStart(), descriptor_index); gpu_handle_out = provider.OffsetViewDescriptor( view_bindful_heap_pool_->GetLastRequestHeapGPUStart(), descriptor_index); return current_heap_index; } uint32_t D3D12CommandProcessor::RequestPersistentViewBindlessDescriptor() { assert_true(bindless_resources_used_); if (!view_bindless_heap_free_.empty()) { uint32_t descriptor_index = view_bindless_heap_free_.back(); view_bindless_heap_free_.pop_back(); return descriptor_index; } if (view_bindless_heap_allocated_ >= kViewBindlessHeapSize) { return UINT32_MAX; } return view_bindless_heap_allocated_++; } void D3D12CommandProcessor::ReleaseViewBindlessDescriptorImmediately( uint32_t descriptor_index) { assert_true(bindless_resources_used_); view_bindless_heap_free_.push_back(descriptor_index); } bool D3D12CommandProcessor::RequestOneUseSingleViewDescriptors( uint32_t count, ui::d3d12::util::DescriptorCpuGpuHandlePair* handles_out) { assert_true(submission_open_); if (!count) { return true; } assert_not_null(handles_out); const ui::d3d12::D3D12Provider& provider = GetD3D12Provider(); if (bindless_resources_used_) { // Request separate bindless descriptors that will be freed when this // submission is completed by the GPU. if (count > kViewBindlessHeapSize - view_bindless_heap_allocated_ + view_bindless_heap_free_.size()) { return false; } for (uint32_t i = 0; i < count; ++i) { uint32_t descriptor_index; if (!view_bindless_heap_free_.empty()) { descriptor_index = view_bindless_heap_free_.back(); view_bindless_heap_free_.pop_back(); } else { descriptor_index = view_bindless_heap_allocated_++; } view_bindless_one_use_descriptors_.push_back( std::make_pair(descriptor_index, submission_current_)); handles_out[i] = std::make_pair(provider.OffsetViewDescriptor( view_bindless_heap_cpu_start_, descriptor_index), provider.OffsetViewDescriptor( view_bindless_heap_gpu_start_, descriptor_index)); } } else { // Request a range within the current heap for bindful resources path. D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle_start; D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_start; if (RequestViewBindfulDescriptors( ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid, count, count, cpu_handle_start, gpu_handle_start) == ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid) { return false; } for (uint32_t i = 0; i < count; ++i) { handles_out[i] = std::make_pair(provider.OffsetViewDescriptor(cpu_handle_start, i), provider.OffsetViewDescriptor(gpu_handle_start, i)); } } return true; } ui::d3d12::util::DescriptorCpuGpuHandlePair D3D12CommandProcessor::GetSystemBindlessViewHandlePair( SystemBindlessView view) const { assert_true(bindless_resources_used_); const ui::d3d12::D3D12Provider& provider = GetD3D12Provider(); return std::make_pair(provider.OffsetViewDescriptor( view_bindless_heap_cpu_start_, uint32_t(view)), provider.OffsetViewDescriptor( view_bindless_heap_gpu_start_, uint32_t(view))); } ui::d3d12::util::DescriptorCpuGpuHandlePair D3D12CommandProcessor::GetSharedMemoryUintPow2BindlessSRVHandlePair( uint32_t element_size_bytes_pow2) const { SystemBindlessView view; switch (element_size_bytes_pow2) { case 2: view = SystemBindlessView::kSharedMemoryR32UintSRV; break; case 3: view = SystemBindlessView::kSharedMemoryR32G32UintSRV; break; case 4: view = SystemBindlessView::kSharedMemoryR32G32B32A32UintSRV; break; default: assert_unhandled_case(element_size_bytes_pow2); view = SystemBindlessView::kSharedMemoryR32UintSRV; } return GetSystemBindlessViewHandlePair(view); } ui::d3d12::util::DescriptorCpuGpuHandlePair D3D12CommandProcessor::GetSharedMemoryUintPow2BindlessUAVHandlePair( uint32_t element_size_bytes_pow2) const { SystemBindlessView view; switch (element_size_bytes_pow2) { case 2: view = SystemBindlessView::kSharedMemoryR32UintUAV; break; case 3: view = SystemBindlessView::kSharedMemoryR32G32UintUAV; break; case 4: view = SystemBindlessView::kSharedMemoryR32G32B32A32UintUAV; break; default: assert_unhandled_case(element_size_bytes_pow2); view = SystemBindlessView::kSharedMemoryR32UintUAV; } return GetSystemBindlessViewHandlePair(view); } ui::d3d12::util::DescriptorCpuGpuHandlePair D3D12CommandProcessor::GetEdramUintPow2BindlessSRVHandlePair( uint32_t element_size_bytes_pow2) const { SystemBindlessView view; switch (element_size_bytes_pow2) { case 2: view = SystemBindlessView::kEdramR32UintSRV; break; case 3: view = SystemBindlessView::kEdramR32G32UintSRV; break; case 4: view = SystemBindlessView::kEdramR32G32B32A32UintSRV; break; default: assert_unhandled_case(element_size_bytes_pow2); view = SystemBindlessView::kEdramR32UintSRV; } return GetSystemBindlessViewHandlePair(view); } ui::d3d12::util::DescriptorCpuGpuHandlePair D3D12CommandProcessor::GetEdramUintPow2BindlessUAVHandlePair( uint32_t element_size_bytes_pow2) const { SystemBindlessView view; switch (element_size_bytes_pow2) { case 2: view = SystemBindlessView::kEdramR32UintUAV; break; case 3: view = SystemBindlessView::kEdramR32G32UintUAV; break; case 4: view = SystemBindlessView::kEdramR32G32B32A32UintUAV; break; default: assert_unhandled_case(element_size_bytes_pow2); view = SystemBindlessView::kEdramR32UintUAV; } return GetSystemBindlessViewHandlePair(view); } uint64_t D3D12CommandProcessor::RequestSamplerBindfulDescriptors( uint64_t previous_heap_index, uint32_t count_for_partial_update, uint32_t count_for_full_update, D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out, D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out) { assert_false(bindless_resources_used_); assert_true(submission_open_); uint32_t descriptor_index; uint64_t current_heap_index = sampler_bindful_heap_pool_->Request( frame_current_, previous_heap_index, count_for_partial_update, count_for_full_update, descriptor_index); if (current_heap_index == ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid) { // There was an error. return ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid; } ID3D12DescriptorHeap* heap = sampler_bindful_heap_pool_->GetLastRequestHeap(); if (sampler_bindful_heap_current_ != heap) { sampler_bindful_heap_current_ = heap; deferred_command_list_.SetDescriptorHeaps(view_bindful_heap_current_, sampler_bindful_heap_current_); } const ui::d3d12::D3D12Provider& provider = GetD3D12Provider(); cpu_handle_out = provider.OffsetSamplerDescriptor( sampler_bindful_heap_pool_->GetLastRequestHeapCPUStart(), descriptor_index); gpu_handle_out = provider.OffsetSamplerDescriptor( sampler_bindful_heap_pool_->GetLastRequestHeapGPUStart(), descriptor_index); return current_heap_index; } ID3D12Resource* D3D12CommandProcessor::RequestScratchGPUBuffer( uint32_t size, D3D12_RESOURCE_STATES state) { assert_true(submission_open_); assert_false(scratch_buffer_used_); if (!submission_open_ || scratch_buffer_used_ || size == 0) { return nullptr; } if (size <= scratch_buffer_size_) { PushTransitionBarrier(scratch_buffer_, scratch_buffer_state_, state); scratch_buffer_state_ = state; scratch_buffer_used_ = true; return scratch_buffer_; } size = xe::align(size, kScratchBufferSizeIncrement); const ui::d3d12::D3D12Provider& provider = GetD3D12Provider(); ID3D12Device* device = provider.GetDevice(); D3D12_RESOURCE_DESC buffer_desc; ui::d3d12::util::FillBufferResourceDesc( buffer_desc, size, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); ID3D12Resource* buffer; if (FAILED(device->CreateCommittedResource( &ui::d3d12::util::kHeapPropertiesDefault, provider.GetHeapFlagCreateNotZeroed(), &buffer_desc, state, nullptr, IID_PPV_ARGS(&buffer)))) { XELOGE("Failed to create a {} MB scratch GPU buffer", size >> 20); return nullptr; } if (scratch_buffer_ != nullptr) { resources_for_deletion_.emplace_back(submission_current_, scratch_buffer_); } scratch_buffer_ = buffer; scratch_buffer_size_ = size; scratch_buffer_state_ = state; scratch_buffer_used_ = true; return scratch_buffer_; } void D3D12CommandProcessor::ReleaseScratchGPUBuffer( ID3D12Resource* buffer, D3D12_RESOURCE_STATES new_state) { assert_true(submission_open_); assert_true(scratch_buffer_used_); scratch_buffer_used_ = false; if (buffer == scratch_buffer_) { scratch_buffer_state_ = new_state; } } void D3D12CommandProcessor::SetExternalPipeline(ID3D12PipelineState* pipeline) { if (current_external_pipeline_ != pipeline) { current_external_pipeline_ = pipeline; current_guest_pipeline_ = nullptr; deferred_command_list_.D3DSetPipelineState(pipeline); } } void D3D12CommandProcessor::SetExternalGraphicsRootSignature( ID3D12RootSignature* root_signature) { if (current_graphics_root_signature_ != root_signature) { current_graphics_root_signature_ = root_signature; deferred_command_list_.D3DSetGraphicsRootSignature(root_signature); } // Force-invalidate because setting a non-guest root signature. current_graphics_root_up_to_date_ = 0; } void D3D12CommandProcessor::SetViewport(const D3D12_VIEWPORT& viewport) { ff_viewport_update_needed_ |= ff_viewport_.TopLeftX != viewport.TopLeftX; ff_viewport_update_needed_ |= ff_viewport_.TopLeftY != viewport.TopLeftY; ff_viewport_update_needed_ |= ff_viewport_.Width != viewport.Width; ff_viewport_update_needed_ |= ff_viewport_.Height != viewport.Height; ff_viewport_update_needed_ |= ff_viewport_.MinDepth != viewport.MinDepth; ff_viewport_update_needed_ |= ff_viewport_.MaxDepth != viewport.MaxDepth; if (ff_viewport_update_needed_) { ff_viewport_ = viewport; deferred_command_list_.RSSetViewport(ff_viewport_); ff_viewport_update_needed_ = false; } } void D3D12CommandProcessor::SetScissorRect(const D3D12_RECT& scissor_rect) { ff_scissor_update_needed_ |= ff_scissor_.left != scissor_rect.left; ff_scissor_update_needed_ |= ff_scissor_.top != scissor_rect.top; ff_scissor_update_needed_ |= ff_scissor_.right != scissor_rect.right; ff_scissor_update_needed_ |= ff_scissor_.bottom != scissor_rect.bottom; if (ff_scissor_update_needed_) { ff_scissor_ = scissor_rect; deferred_command_list_.RSSetScissorRect(ff_scissor_); ff_scissor_update_needed_ = false; } } void D3D12CommandProcessor::SetStencilReference(uint32_t stencil_ref) { ff_stencil_ref_update_needed_ |= ff_stencil_ref_ != stencil_ref; if (ff_stencil_ref_update_needed_) { ff_stencil_ref_ = stencil_ref; deferred_command_list_.D3DOMSetStencilRef(stencil_ref); ff_stencil_ref_update_needed_ = false; } } void D3D12CommandProcessor::SetPrimitiveTopology( D3D12_PRIMITIVE_TOPOLOGY primitive_topology) { if (primitive_topology_ != primitive_topology) { primitive_topology_ = primitive_topology; deferred_command_list_.D3DIASetPrimitiveTopology(primitive_topology); } } std::string D3D12CommandProcessor::GetWindowTitleText() const { std::ostringstream title; title << "Direct3D 12"; if (render_target_cache_) { // Rasterizer-ordered views are a feature very rarely used as of 2020 and // that faces adoption complications (outside of Direct3D - on Vulkan - at // least), but crucial to Xenia - raise awareness of its usage. // https://github.com/KhronosGroup/Vulkan-Ecosystem/issues/27#issuecomment-455712319 // "In Xenia's title bar "D3D12 ROV" can be seen, which was a surprise, as I // wasn't aware that Xenia D3D12 backend was using Raster Order Views // feature" - oscarbg in that issue. switch (render_target_cache_->GetPath()) { case RenderTargetCache::Path::kHostRenderTargets: title << " - RTV/DSV"; break; case RenderTargetCache::Path::kPixelShaderInterlock: title << " - ROV"; break; default: break; } uint32_t draw_resolution_scale_x = texture_cache_ ? texture_cache_->draw_resolution_scale_x() : 1; uint32_t draw_resolution_scale_y = texture_cache_ ? texture_cache_->draw_resolution_scale_y() : 1; if (draw_resolution_scale_x > 1 || draw_resolution_scale_y > 1) { title << ' ' << draw_resolution_scale_x << 'x' << draw_resolution_scale_y; } } return title.str(); } bool D3D12CommandProcessor::SetupContext() { if (!CommandProcessor::SetupContext()) { XELOGE("Failed to initialize base command processor context"); return false; } const ui::d3d12::D3D12Provider& provider = GetD3D12Provider(); ID3D12Device* device = provider.GetDevice(); ID3D12CommandQueue* direct_queue = provider.GetDirectQueue(); fence_completion_event_ = CreateEvent(nullptr, FALSE, FALSE, nullptr); if (fence_completion_event_ == nullptr) { XELOGE("Failed to create the fence completion event"); return false; } if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&submission_fence_)))) { XELOGE("Failed to create the submission fence"); return false; } if (FAILED(device->CreateFence( 0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&queue_operations_since_submission_fence_)))) { XELOGE( "Failed to create the fence for awaiting queue operations done since " "the latest submission"); return false; } // Create the command list and one allocator because it's needed for a command // list. ID3D12CommandAllocator* command_allocator; if (FAILED(device->CreateCommandAllocator( D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&command_allocator)))) { XELOGE("Failed to create a command allocator"); return false; } command_allocator_writable_first_ = new CommandAllocator; command_allocator_writable_first_->command_allocator = command_allocator; command_allocator_writable_first_->last_usage_submission = 0; command_allocator_writable_first_->next = nullptr; command_allocator_writable_last_ = command_allocator_writable_first_; if (FAILED(device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, command_allocator, nullptr, IID_PPV_ARGS(&command_list_)))) { XELOGE("Failed to create the graphics command list"); return false; } // Initially in open state, wait until a deferred command list submission. command_list_->Close(); // Optional - added in Creators Update (SDK 10.0.15063.0). command_list_->QueryInterface(IID_PPV_ARGS(&command_list_1_)); bindless_resources_used_ = cvars::d3d12_bindless && provider.GetResourceBindingTier() >= D3D12_RESOURCE_BINDING_TIER_2; // Get the draw resolution scale for the render target cache and the texture // cache. uint32_t draw_resolution_scale_x, draw_resolution_scale_y; bool draw_resolution_scale_not_clamped = TextureCache::GetConfigDrawResolutionScale(draw_resolution_scale_x, draw_resolution_scale_y); if (!D3D12TextureCache::ClampDrawResolutionScaleToMaxSupported( draw_resolution_scale_x, draw_resolution_scale_y, provider)) { draw_resolution_scale_not_clamped = false; } if (!draw_resolution_scale_not_clamped) { XELOGW( "The requested draw resolution scale is not supported by the device or " "the emulator, reducing to {}x{}", draw_resolution_scale_x, draw_resolution_scale_y); } shared_memory_ = std::make_unique(*this, *memory_, trace_writer_); if (!shared_memory_->Initialize()) { XELOGE("Failed to initialize shared memory"); return false; } // Initialize the render target cache before configuring binding - need to // know if using rasterizer-ordered views for the bindless root signature. render_target_cache_ = std::make_unique( *register_file_, *memory_, trace_writer_, draw_resolution_scale_x, draw_resolution_scale_y, *this, bindless_resources_used_); if (!render_target_cache_->Initialize()) { XELOGE("Failed to initialize the render target cache"); return false; } // Initialize resource binding. constant_buffer_pool_ = std::make_unique( provider, std::max(ui::d3d12::D3D12UploadBufferPool::kDefaultPageSize, sizeof(float) * 4 * D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT)); if (bindless_resources_used_) { D3D12_DESCRIPTOR_HEAP_DESC view_bindless_heap_desc; view_bindless_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; view_bindless_heap_desc.NumDescriptors = kViewBindlessHeapSize; view_bindless_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; view_bindless_heap_desc.NodeMask = 0; if (FAILED(device->CreateDescriptorHeap( &view_bindless_heap_desc, IID_PPV_ARGS(&view_bindless_heap_)))) { XELOGE("Failed to create the bindless CBV/SRV/UAV descriptor heap"); return false; } view_bindless_heap_cpu_start_ = view_bindless_heap_->GetCPUDescriptorHandleForHeapStart(); view_bindless_heap_gpu_start_ = view_bindless_heap_->GetGPUDescriptorHandleForHeapStart(); view_bindless_heap_allocated_ = uint32_t(SystemBindlessView::kCount); D3D12_DESCRIPTOR_HEAP_DESC sampler_bindless_heap_desc; sampler_bindless_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; sampler_bindless_heap_desc.NumDescriptors = kSamplerHeapSize; sampler_bindless_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; sampler_bindless_heap_desc.NodeMask = 0; if (FAILED(device->CreateDescriptorHeap( &sampler_bindless_heap_desc, IID_PPV_ARGS(&sampler_bindless_heap_current_)))) { XELOGE("Failed to create the bindless sampler descriptor heap"); return false; } sampler_bindless_heap_cpu_start_ = sampler_bindless_heap_current_->GetCPUDescriptorHandleForHeapStart(); sampler_bindless_heap_gpu_start_ = sampler_bindless_heap_current_->GetGPUDescriptorHandleForHeapStart(); sampler_bindless_heap_allocated_ = 0; } else { view_bindful_heap_pool_ = std::make_unique( device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, kViewBindfulHeapSize); sampler_bindful_heap_pool_ = std::make_unique( device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, kSamplerHeapSize); } if (bindless_resources_used_) { // Global bindless resource root signatures. // No CBV or UAV descriptor ranges with any descriptors to be allocated // dynamically (via RequestPersistentViewBindlessDescriptor or // RequestOneUseSingleViewDescriptors) should be here, because they would // overlap the unbounded SRV range, which is not allowed on Nvidia Fermi! D3D12_ROOT_SIGNATURE_DESC root_signature_bindless_desc; D3D12_ROOT_PARAMETER root_parameters_bindless[kRootParameter_Bindless_Count]; root_signature_bindless_desc.NumParameters = kRootParameter_Bindless_Count; root_signature_bindless_desc.pParameters = root_parameters_bindless; root_signature_bindless_desc.NumStaticSamplers = 0; root_signature_bindless_desc.pStaticSamplers = nullptr; root_signature_bindless_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; // Fetch constants. { auto& parameter = root_parameters_bindless[kRootParameter_Bindless_FetchConstants]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; parameter.Descriptor.ShaderRegister = uint32_t(DxbcShaderTranslator::CbufferRegister::kFetchConstants); parameter.Descriptor.RegisterSpace = 0; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; } // Vertex float constants. { auto& parameter = root_parameters_bindless [kRootParameter_Bindless_FloatConstantsVertex]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; parameter.Descriptor.ShaderRegister = uint32_t(DxbcShaderTranslator::CbufferRegister::kFloatConstants); parameter.Descriptor.RegisterSpace = 0; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; } // Pixel float constants. { auto& parameter = root_parameters_bindless[kRootParameter_Bindless_FloatConstantsPixel]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; parameter.Descriptor.ShaderRegister = uint32_t(DxbcShaderTranslator::CbufferRegister::kFloatConstants); parameter.Descriptor.RegisterSpace = 0; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; } // Pixel shader descriptor indices. { auto& parameter = root_parameters_bindless [kRootParameter_Bindless_DescriptorIndicesPixel]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; parameter.Descriptor.ShaderRegister = uint32_t(DxbcShaderTranslator::CbufferRegister::kDescriptorIndices); parameter.Descriptor.RegisterSpace = 0; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; } // Vertex shader descriptor indices. { auto& parameter = root_parameters_bindless [kRootParameter_Bindless_DescriptorIndicesVertex]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; parameter.Descriptor.ShaderRegister = uint32_t(DxbcShaderTranslator::CbufferRegister::kDescriptorIndices); parameter.Descriptor.RegisterSpace = 0; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; } // System constants. { auto& parameter = root_parameters_bindless[kRootParameter_Bindless_SystemConstants]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; parameter.Descriptor.ShaderRegister = uint32_t(DxbcShaderTranslator::CbufferRegister::kSystemConstants); parameter.Descriptor.RegisterSpace = 0; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; } // Bool and loop constants. { auto& parameter = root_parameters_bindless[kRootParameter_Bindless_BoolLoopConstants]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; parameter.Descriptor.ShaderRegister = uint32_t(DxbcShaderTranslator::CbufferRegister::kBoolLoopConstants); parameter.Descriptor.RegisterSpace = 0; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; } // Sampler heap. D3D12_DESCRIPTOR_RANGE root_bindless_sampler_range; { auto& parameter = root_parameters_bindless[kRootParameter_Bindless_SamplerHeap]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; // Will be appending. parameter.DescriptorTable.NumDescriptorRanges = 1; parameter.DescriptorTable.pDescriptorRanges = &root_bindless_sampler_range; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; root_bindless_sampler_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; root_bindless_sampler_range.NumDescriptors = UINT_MAX; root_bindless_sampler_range.BaseShaderRegister = 0; root_bindless_sampler_range.RegisterSpace = 0; root_bindless_sampler_range.OffsetInDescriptorsFromTableStart = 0; } // View heap. D3D12_DESCRIPTOR_RANGE root_bindless_view_ranges[6]; { auto& parameter = root_parameters_bindless[kRootParameter_Bindless_ViewHeap]; parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; // Will be appending. parameter.DescriptorTable.NumDescriptorRanges = 0; parameter.DescriptorTable.pDescriptorRanges = root_bindless_view_ranges; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; // Shared memory SRV. { assert_true(parameter.DescriptorTable.NumDescriptorRanges < xe::countof(root_bindless_view_ranges)); auto& range = root_bindless_view_ranges[parameter.DescriptorTable .NumDescriptorRanges++]; range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; range.NumDescriptors = 1; range.BaseShaderRegister = UINT(DxbcShaderTranslator::SRVMainRegister::kSharedMemory); range.RegisterSpace = UINT(DxbcShaderTranslator::SRVSpace::kMain); range.OffsetInDescriptorsFromTableStart = UINT(SystemBindlessView::kSharedMemoryRawSRV); } // Shared memory UAV. { assert_true(parameter.DescriptorTable.NumDescriptorRanges < xe::countof(root_bindless_view_ranges)); auto& range = root_bindless_view_ranges[parameter.DescriptorTable .NumDescriptorRanges++]; range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; range.NumDescriptors = 1; range.BaseShaderRegister = UINT(DxbcShaderTranslator::UAVRegister::kSharedMemory); range.RegisterSpace = 0; range.OffsetInDescriptorsFromTableStart = UINT(SystemBindlessView::kSharedMemoryRawUAV); } // EDRAM. if (render_target_cache_->GetPath() == RenderTargetCache::Path::kPixelShaderInterlock) { assert_true(parameter.DescriptorTable.NumDescriptorRanges < xe::countof(root_bindless_view_ranges)); auto& range = root_bindless_view_ranges[parameter.DescriptorTable .NumDescriptorRanges++]; range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; range.NumDescriptors = 1; range.BaseShaderRegister = UINT(DxbcShaderTranslator::UAVRegister::kEdram); range.RegisterSpace = 0; range.OffsetInDescriptorsFromTableStart = UINT(SystemBindlessView::kEdramR32UintUAV); } // Used UAV and SRV ranges must not overlap on Nvidia Fermi, so textures // have OffsetInDescriptorsFromTableStart after all static descriptors of // other types. // 2D array textures. { assert_true(parameter.DescriptorTable.NumDescriptorRanges < xe::countof(root_bindless_view_ranges)); auto& range = root_bindless_view_ranges[parameter.DescriptorTable .NumDescriptorRanges++]; range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; range.NumDescriptors = UINT_MAX; range.BaseShaderRegister = 0; range.RegisterSpace = UINT(DxbcShaderTranslator::SRVSpace::kBindlessTextures2DArray); range.OffsetInDescriptorsFromTableStart = UINT(SystemBindlessView::kUnboundedSRVsStart); } // 3D textures. { assert_true(parameter.DescriptorTable.NumDescriptorRanges < xe::countof(root_bindless_view_ranges)); auto& range = root_bindless_view_ranges[parameter.DescriptorTable .NumDescriptorRanges++]; range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; range.NumDescriptors = UINT_MAX; range.BaseShaderRegister = 0; range.RegisterSpace = UINT(DxbcShaderTranslator::SRVSpace::kBindlessTextures3D); range.OffsetInDescriptorsFromTableStart = UINT(SystemBindlessView::kUnboundedSRVsStart); } // Cube textures. { assert_true(parameter.DescriptorTable.NumDescriptorRanges < xe::countof(root_bindless_view_ranges)); auto& range = root_bindless_view_ranges[parameter.DescriptorTable .NumDescriptorRanges++]; range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; range.NumDescriptors = UINT_MAX; range.BaseShaderRegister = 0; range.RegisterSpace = UINT(DxbcShaderTranslator::SRVSpace::kBindlessTexturesCube); range.OffsetInDescriptorsFromTableStart = UINT(SystemBindlessView::kUnboundedSRVsStart); } } root_signature_bindless_vs_ = ui::d3d12::util::CreateRootSignature( provider, root_signature_bindless_desc); if (!root_signature_bindless_vs_) { XELOGE( "Failed to create the global root signature for bindless resources, " "the version for use without tessellation"); return false; } root_parameters_bindless[kRootParameter_Bindless_FloatConstantsVertex] .ShaderVisibility = D3D12_SHADER_VISIBILITY_DOMAIN; root_parameters_bindless[kRootParameter_Bindless_DescriptorIndicesVertex] .ShaderVisibility = D3D12_SHADER_VISIBILITY_DOMAIN; root_signature_bindless_ds_ = ui::d3d12::util::CreateRootSignature( provider, root_signature_bindless_desc); if (!root_signature_bindless_ds_) { XELOGE( "Failed to create the global root signature for bindless resources, " "the version for use with tessellation"); return false; } } primitive_processor_ = std::make_unique( *register_file_, *memory_, trace_writer_, *shared_memory_, *this); if (!primitive_processor_->Initialize()) { XELOGE("Failed to initialize the geometric primitive processor"); return false; } texture_cache_ = D3D12TextureCache::Create( *register_file_, *shared_memory_, draw_resolution_scale_x, draw_resolution_scale_y, *this, bindless_resources_used_); if (!texture_cache_) { XELOGE("Failed to initialize the texture cache"); return false; } pipeline_cache_ = std::make_unique(*this, *register_file_, *render_target_cache_.get(), bindless_resources_used_); if (!pipeline_cache_->Initialize()) { XELOGE("Failed to initialize the graphics pipeline cache"); return false; } D3D12_HEAP_FLAGS heap_flag_create_not_zeroed = provider.GetHeapFlagCreateNotZeroed(); // Create gamma ramp resources. gamma_ramp_256_entry_table_up_to_date_ = false; gamma_ramp_pwl_up_to_date_ = false; D3D12_RESOURCE_DESC gamma_ramp_buffer_desc; ui::d3d12::util::FillBufferResourceDesc( gamma_ramp_buffer_desc, (256 + 128 * 3) * 4, D3D12_RESOURCE_FLAG_NONE); // The first action will be uploading. gamma_ramp_buffer_state_ = D3D12_RESOURCE_STATE_COPY_DEST; if (FAILED(device->CreateCommittedResource( &ui::d3d12::util::kHeapPropertiesDefault, heap_flag_create_not_zeroed, &gamma_ramp_buffer_desc, gamma_ramp_buffer_state_, nullptr, IID_PPV_ARGS(&gamma_ramp_buffer_)))) { XELOGE("Failed to create the gamma ramp buffer"); return false; } // The upload buffer is frame-buffered. gamma_ramp_buffer_desc.Width *= kQueueFrames; if (FAILED(device->CreateCommittedResource( &ui::d3d12::util::kHeapPropertiesUpload, heap_flag_create_not_zeroed, &gamma_ramp_buffer_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(&gamma_ramp_upload_buffer_)))) { XELOGE("Failed to create the gamma ramp upload buffer"); return false; } if (FAILED(gamma_ramp_upload_buffer_->Map( 0, nullptr, reinterpret_cast(&gamma_ramp_upload_buffer_mapping_)))) { XELOGE("Failed to map the gamma ramp upload buffer"); gamma_ramp_upload_buffer_mapping_ = nullptr; return false; } // Initialize compute pipelines for output with gamma ramp. D3D12_ROOT_PARAMETER apply_gamma_root_parameters[UINT(ApplyGammaRootParameter::kCount)]; { D3D12_ROOT_PARAMETER& apply_gamma_root_parameter_constants = apply_gamma_root_parameters[UINT(ApplyGammaRootParameter::kConstants)]; apply_gamma_root_parameter_constants.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; apply_gamma_root_parameter_constants.Constants.ShaderRegister = 0; apply_gamma_root_parameter_constants.Constants.RegisterSpace = 0; apply_gamma_root_parameter_constants.Constants.Num32BitValues = sizeof(ApplyGammaConstants) / sizeof(uint32_t); apply_gamma_root_parameter_constants.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; } D3D12_DESCRIPTOR_RANGE apply_gamma_root_descriptor_range_dest; apply_gamma_root_descriptor_range_dest.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; apply_gamma_root_descriptor_range_dest.NumDescriptors = 1; apply_gamma_root_descriptor_range_dest.BaseShaderRegister = 0; apply_gamma_root_descriptor_range_dest.RegisterSpace = 0; apply_gamma_root_descriptor_range_dest.OffsetInDescriptorsFromTableStart = 0; { D3D12_ROOT_PARAMETER& apply_gamma_root_parameter_dest = apply_gamma_root_parameters[UINT( ApplyGammaRootParameter::kDestination)]; apply_gamma_root_parameter_dest.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; apply_gamma_root_parameter_dest.DescriptorTable.NumDescriptorRanges = 1; apply_gamma_root_parameter_dest.DescriptorTable.pDescriptorRanges = &apply_gamma_root_descriptor_range_dest; apply_gamma_root_parameter_dest.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; } D3D12_DESCRIPTOR_RANGE apply_gamma_root_descriptor_range_source; apply_gamma_root_descriptor_range_source.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; apply_gamma_root_descriptor_range_source.NumDescriptors = 1; apply_gamma_root_descriptor_range_source.BaseShaderRegister = 1; apply_gamma_root_descriptor_range_source.RegisterSpace = 0; apply_gamma_root_descriptor_range_source.OffsetInDescriptorsFromTableStart = 0; { D3D12_ROOT_PARAMETER& apply_gamma_root_parameter_source = apply_gamma_root_parameters[UINT(ApplyGammaRootParameter::kSource)]; apply_gamma_root_parameter_source.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; apply_gamma_root_parameter_source.DescriptorTable.NumDescriptorRanges = 1; apply_gamma_root_parameter_source.DescriptorTable.pDescriptorRanges = &apply_gamma_root_descriptor_range_source; apply_gamma_root_parameter_source.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; } D3D12_DESCRIPTOR_RANGE apply_gamma_root_descriptor_range_ramp; apply_gamma_root_descriptor_range_ramp.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; apply_gamma_root_descriptor_range_ramp.NumDescriptors = 1; apply_gamma_root_descriptor_range_ramp.BaseShaderRegister = 0; apply_gamma_root_descriptor_range_ramp.RegisterSpace = 0; apply_gamma_root_descriptor_range_ramp.OffsetInDescriptorsFromTableStart = 0; { D3D12_ROOT_PARAMETER& apply_gamma_root_parameter_gamma_ramp = apply_gamma_root_parameters[UINT(ApplyGammaRootParameter::kRamp)]; apply_gamma_root_parameter_gamma_ramp.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; apply_gamma_root_parameter_gamma_ramp.DescriptorTable.NumDescriptorRanges = 1; apply_gamma_root_parameter_gamma_ramp.DescriptorTable.pDescriptorRanges = &apply_gamma_root_descriptor_range_ramp; apply_gamma_root_parameter_gamma_ramp.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; } D3D12_ROOT_SIGNATURE_DESC apply_gamma_root_signature_desc; apply_gamma_root_signature_desc.NumParameters = UINT(ApplyGammaRootParameter::kCount); apply_gamma_root_signature_desc.pParameters = apply_gamma_root_parameters; apply_gamma_root_signature_desc.NumStaticSamplers = 0; apply_gamma_root_signature_desc.pStaticSamplers = nullptr; apply_gamma_root_signature_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; *(apply_gamma_root_signature_.ReleaseAndGetAddressOf()) = ui::d3d12::util::CreateRootSignature(provider, apply_gamma_root_signature_desc); if (!apply_gamma_root_signature_) { XELOGE("Failed to create the gamma ramp application root signature"); return false; } *(apply_gamma_table_pipeline_.ReleaseAndGetAddressOf()) = ui::d3d12::util::CreateComputePipeline( device, shaders::apply_gamma_table_cs, sizeof(shaders::apply_gamma_table_cs), apply_gamma_root_signature_.Get()); if (!apply_gamma_table_pipeline_) { XELOGE( "Failed to create the 256-entry table gamma ramp application compute " "pipeline"); return false; } *(apply_gamma_table_fxaa_luma_pipeline_.ReleaseAndGetAddressOf()) = ui::d3d12::util::CreateComputePipeline( device, shaders::apply_gamma_table_fxaa_luma_cs, sizeof(shaders::apply_gamma_table_fxaa_luma_cs), apply_gamma_root_signature_.Get()); if (!apply_gamma_table_fxaa_luma_pipeline_) { XELOGE( "Failed to create the 256-entry table gamma ramp application compute " "pipeline with perceptual luma output"); return false; } *(apply_gamma_pwl_pipeline_.ReleaseAndGetAddressOf()) = ui::d3d12::util::CreateComputePipeline( device, shaders::apply_gamma_pwl_cs, sizeof(shaders::apply_gamma_pwl_cs), apply_gamma_root_signature_.Get()); if (!apply_gamma_pwl_pipeline_) { XELOGE("Failed to create the PWL gamma ramp application compute pipeline"); return false; } *(apply_gamma_pwl_fxaa_luma_pipeline_.ReleaseAndGetAddressOf()) = ui::d3d12::util::CreateComputePipeline( device, shaders::apply_gamma_pwl_fxaa_luma_cs, sizeof(shaders::apply_gamma_pwl_fxaa_luma_cs), apply_gamma_root_signature_.Get()); if (!apply_gamma_pwl_fxaa_luma_pipeline_) { XELOGE( "Failed to create the PWL gamma ramp application compute pipeline with " "perceptual luma output"); return false; } // Initialize compute pipelines for post-processing anti-aliasing. D3D12_ROOT_PARAMETER fxaa_root_parameters[UINT(FxaaRootParameter::kCount)]; { D3D12_ROOT_PARAMETER& fxaa_root_parameter_constants = fxaa_root_parameters[UINT(ApplyGammaRootParameter::kConstants)]; fxaa_root_parameter_constants.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; fxaa_root_parameter_constants.Constants.ShaderRegister = 0; fxaa_root_parameter_constants.Constants.RegisterSpace = 0; fxaa_root_parameter_constants.Constants.Num32BitValues = sizeof(FxaaConstants) / sizeof(uint32_t); fxaa_root_parameter_constants.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; } D3D12_DESCRIPTOR_RANGE fxaa_root_descriptor_range_dest; fxaa_root_descriptor_range_dest.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; fxaa_root_descriptor_range_dest.NumDescriptors = 1; fxaa_root_descriptor_range_dest.BaseShaderRegister = 0; fxaa_root_descriptor_range_dest.RegisterSpace = 0; fxaa_root_descriptor_range_dest.OffsetInDescriptorsFromTableStart = 0; { D3D12_ROOT_PARAMETER& fxaa_root_parameter_dest = fxaa_root_parameters[UINT(FxaaRootParameter::kDestination)]; fxaa_root_parameter_dest.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; fxaa_root_parameter_dest.DescriptorTable.NumDescriptorRanges = 1; fxaa_root_parameter_dest.DescriptorTable.pDescriptorRanges = &fxaa_root_descriptor_range_dest; fxaa_root_parameter_dest.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; } D3D12_DESCRIPTOR_RANGE fxaa_root_descriptor_range_source; fxaa_root_descriptor_range_source.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; fxaa_root_descriptor_range_source.NumDescriptors = 1; fxaa_root_descriptor_range_source.BaseShaderRegister = 0; fxaa_root_descriptor_range_source.RegisterSpace = 0; fxaa_root_descriptor_range_source.OffsetInDescriptorsFromTableStart = 0; { D3D12_ROOT_PARAMETER& fxaa_root_parameter_source = fxaa_root_parameters[UINT(FxaaRootParameter::kSource)]; fxaa_root_parameter_source.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; fxaa_root_parameter_source.DescriptorTable.NumDescriptorRanges = 1; fxaa_root_parameter_source.DescriptorTable.pDescriptorRanges = &fxaa_root_descriptor_range_source; fxaa_root_parameter_source.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; } D3D12_STATIC_SAMPLER_DESC fxaa_root_sampler; fxaa_root_sampler.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR; fxaa_root_sampler.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; fxaa_root_sampler.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; fxaa_root_sampler.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; fxaa_root_sampler.MipLODBias = 0.0f; fxaa_root_sampler.MaxAnisotropy = 1; fxaa_root_sampler.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER; fxaa_root_sampler.BorderColor = D3D12_STATIC_BORDER_COLOR_OPAQUE_BLACK; fxaa_root_sampler.MinLOD = 0.0f; fxaa_root_sampler.MaxLOD = 0.0f; fxaa_root_sampler.ShaderRegister = 0; fxaa_root_sampler.RegisterSpace = 0; fxaa_root_sampler.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; D3D12_ROOT_SIGNATURE_DESC fxaa_root_signature_desc; fxaa_root_signature_desc.NumParameters = UINT(FxaaRootParameter::kCount); fxaa_root_signature_desc.pParameters = fxaa_root_parameters; fxaa_root_signature_desc.NumStaticSamplers = 1; fxaa_root_signature_desc.pStaticSamplers = &fxaa_root_sampler; fxaa_root_signature_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; *(fxaa_root_signature_.ReleaseAndGetAddressOf()) = ui::d3d12::util::CreateRootSignature(provider, fxaa_root_signature_desc); if (!fxaa_root_signature_) { XELOGE("Failed to create the FXAA root signature"); return false; } *(fxaa_pipeline_.ReleaseAndGetAddressOf()) = ui::d3d12::util::CreateComputePipeline(device, shaders::fxaa_cs, sizeof(shaders::fxaa_cs), fxaa_root_signature_.Get()); if (!fxaa_pipeline_) { XELOGE("Failed to create the FXAA compute pipeline"); return false; } *(fxaa_extreme_pipeline_.ReleaseAndGetAddressOf()) = ui::d3d12::util::CreateComputePipeline(device, shaders::fxaa_extreme_cs, sizeof(shaders::fxaa_extreme_cs), fxaa_root_signature_.Get()); if (!fxaa_pipeline_) { XELOGE("Failed to create the extreme-quality FXAA compute pipeline"); return false; } if (bindless_resources_used_) { // Create the system bindless descriptors once all resources are // initialized. // kNullTexture2DArray. D3D12_SHADER_RESOURCE_VIEW_DESC null_srv_desc; null_srv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; null_srv_desc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0, D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0); null_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; null_srv_desc.Texture2DArray.MostDetailedMip = 0; null_srv_desc.Texture2DArray.MipLevels = 1; null_srv_desc.Texture2DArray.FirstArraySlice = 0; null_srv_desc.Texture2DArray.ArraySize = 1; null_srv_desc.Texture2DArray.PlaneSlice = 0; null_srv_desc.Texture2DArray.ResourceMinLODClamp = 0.0f; device->CreateShaderResourceView( nullptr, &null_srv_desc, provider.OffsetViewDescriptor( view_bindless_heap_cpu_start_, uint32_t(SystemBindlessView::kNullTexture2DArray))); // kNullTexture3D. null_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; null_srv_desc.Texture3D.MostDetailedMip = 0; null_srv_desc.Texture3D.MipLevels = 1; null_srv_desc.Texture3D.ResourceMinLODClamp = 0.0f; device->CreateShaderResourceView( nullptr, &null_srv_desc, provider.OffsetViewDescriptor( view_bindless_heap_cpu_start_, uint32_t(SystemBindlessView::kNullTexture3D))); // kNullTextureCube. null_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; null_srv_desc.TextureCube.MostDetailedMip = 0; null_srv_desc.TextureCube.MipLevels = 1; null_srv_desc.TextureCube.ResourceMinLODClamp = 0.0f; device->CreateShaderResourceView( nullptr, &null_srv_desc, provider.OffsetViewDescriptor( view_bindless_heap_cpu_start_, uint32_t(SystemBindlessView::kNullTextureCube))); // kSharedMemoryRawSRV. shared_memory_->WriteRawSRVDescriptor(provider.OffsetViewDescriptor( view_bindless_heap_cpu_start_, uint32_t(SystemBindlessView::kSharedMemoryRawSRV))); // kSharedMemoryR32UintSRV. shared_memory_->WriteUintPow2SRVDescriptor( provider.OffsetViewDescriptor( view_bindless_heap_cpu_start_, uint32_t(SystemBindlessView::kSharedMemoryR32UintSRV)), 2); // kSharedMemoryR32G32UintSRV. shared_memory_->WriteUintPow2SRVDescriptor( provider.OffsetViewDescriptor( view_bindless_heap_cpu_start_, uint32_t(SystemBindlessView::kSharedMemoryR32G32UintSRV)), 3); // kSharedMemoryR32G32B32A32UintSRV. shared_memory_->WriteUintPow2SRVDescriptor( provider.OffsetViewDescriptor( view_bindless_heap_cpu_start_, uint32_t(SystemBindlessView::kSharedMemoryR32G32B32A32UintSRV)), 4); // kSharedMemoryRawUAV. shared_memory_->WriteRawUAVDescriptor(provider.OffsetViewDescriptor( view_bindless_heap_cpu_start_, uint32_t(SystemBindlessView::kSharedMemoryRawUAV))); // kSharedMemoryR32UintUAV. shared_memory_->WriteUintPow2UAVDescriptor( provider.OffsetViewDescriptor( view_bindless_heap_cpu_start_, uint32_t(SystemBindlessView::kSharedMemoryR32UintUAV)), 2); // kSharedMemoryR32G32UintUAV. shared_memory_->WriteUintPow2UAVDescriptor( provider.OffsetViewDescriptor( view_bindless_heap_cpu_start_, uint32_t(SystemBindlessView::kSharedMemoryR32G32UintUAV)), 3); // kSharedMemoryR32G32B32A32UintUAV. shared_memory_->WriteUintPow2UAVDescriptor( provider.OffsetViewDescriptor( view_bindless_heap_cpu_start_, uint32_t(SystemBindlessView::kSharedMemoryR32G32B32A32UintUAV)), 4); // kEdramRawSRV. render_target_cache_->WriteEdramRawSRVDescriptor( provider.OffsetViewDescriptor( view_bindless_heap_cpu_start_, uint32_t(SystemBindlessView::kEdramRawSRV))); // kEdramR32UintSRV. render_target_cache_->WriteEdramUintPow2SRVDescriptor( provider.OffsetViewDescriptor( view_bindless_heap_cpu_start_, uint32_t(SystemBindlessView::kEdramR32UintSRV)), 2); // kEdramR32G32UintSRV. render_target_cache_->WriteEdramUintPow2SRVDescriptor( provider.OffsetViewDescriptor( view_bindless_heap_cpu_start_, uint32_t(SystemBindlessView::kEdramR32G32UintSRV)), 3); // kEdramR32G32B32A32UintSRV. render_target_cache_->WriteEdramUintPow2SRVDescriptor( provider.OffsetViewDescriptor( view_bindless_heap_cpu_start_, uint32_t(SystemBindlessView::kEdramR32G32B32A32UintSRV)), 4); // kEdramRawUAV. render_target_cache_->WriteEdramRawUAVDescriptor( provider.OffsetViewDescriptor( view_bindless_heap_cpu_start_, uint32_t(SystemBindlessView::kEdramRawUAV))); // kEdramR32UintUAV. render_target_cache_->WriteEdramUintPow2UAVDescriptor( provider.OffsetViewDescriptor( view_bindless_heap_cpu_start_, uint32_t(SystemBindlessView::kEdramR32UintUAV)), 2); // kEdramR32G32UintUAV. render_target_cache_->WriteEdramUintPow2UAVDescriptor( provider.OffsetViewDescriptor( view_bindless_heap_cpu_start_, uint32_t(SystemBindlessView::kEdramR32G32UintUAV)), 3); // kEdramR32G32B32A32UintUAV. render_target_cache_->WriteEdramUintPow2UAVDescriptor( provider.OffsetViewDescriptor( view_bindless_heap_cpu_start_, uint32_t(SystemBindlessView::kEdramR32G32B32A32UintUAV)), 4); // kGammaRampTableSRV. WriteGammaRampSRV(false, provider.OffsetViewDescriptor( view_bindless_heap_cpu_start_, uint32_t(SystemBindlessView::kGammaRampTableSRV))); // kGammaRampPWLSRV. WriteGammaRampSRV(true, provider.OffsetViewDescriptor( view_bindless_heap_cpu_start_, uint32_t(SystemBindlessView::kGammaRampPWLSRV))); } pix_capture_requested_.store(false, std::memory_order_relaxed); pix_capturing_ = false; // Just not to expose uninitialized memory. std::memset(&system_constants_, 0, sizeof(system_constants_)); return true; } void D3D12CommandProcessor::ShutdownContext() { AwaitAllQueueOperationsCompletion(); ui::d3d12::util::ReleaseAndNull(readback_buffer_); readback_buffer_size_ = 0; ui::d3d12::util::ReleaseAndNull(scratch_buffer_); scratch_buffer_size_ = 0; for (const std::pair& resource_for_deletion : resources_for_deletion_) { resource_for_deletion.second->Release(); } resources_for_deletion_.clear(); fxaa_source_texture_submission_ = 0; fxaa_source_texture_.Reset(); fxaa_extreme_pipeline_.Reset(); fxaa_pipeline_.Reset(); fxaa_root_signature_.Reset(); apply_gamma_pwl_fxaa_luma_pipeline_.Reset(); apply_gamma_pwl_pipeline_.Reset(); apply_gamma_table_fxaa_luma_pipeline_.Reset(); apply_gamma_table_pipeline_.Reset(); apply_gamma_root_signature_.Reset(); // Unmapping will be done implicitly by the destruction. gamma_ramp_upload_buffer_mapping_ = nullptr; gamma_ramp_upload_buffer_.Reset(); gamma_ramp_buffer_.Reset(); texture_cache_.reset(); pipeline_cache_.reset(); primitive_processor_.reset(); // Shut down binding - bindless descriptors may be owned by subsystems like // the texture cache. // Root signatures are used by pipelines, thus freed after the pipelines. ui::d3d12::util::ReleaseAndNull(root_signature_bindless_ds_); ui::d3d12::util::ReleaseAndNull(root_signature_bindless_vs_); for (auto it : root_signatures_bindful_) { it.second->Release(); } root_signatures_bindful_.clear(); if (bindless_resources_used_) { texture_cache_bindless_sampler_map_.clear(); for (const auto& sampler_bindless_heap_overflowed : sampler_bindless_heaps_overflowed_) { sampler_bindless_heap_overflowed.first->Release(); } sampler_bindless_heaps_overflowed_.clear(); sampler_bindless_heap_allocated_ = 0; ui::d3d12::util::ReleaseAndNull(sampler_bindless_heap_current_); view_bindless_one_use_descriptors_.clear(); view_bindless_heap_free_.clear(); ui::d3d12::util::ReleaseAndNull(view_bindless_heap_); } else { sampler_bindful_heap_pool_.reset(); view_bindful_heap_pool_.reset(); } constant_buffer_pool_.reset(); render_target_cache_.reset(); shared_memory_.reset(); deferred_command_list_.Reset(); ui::d3d12::util::ReleaseAndNull(command_list_1_); ui::d3d12::util::ReleaseAndNull(command_list_); ClearCommandAllocatorCache(); frame_open_ = false; frame_current_ = 1; frame_completed_ = 0; std::memset(closed_frame_submissions_, 0, sizeof(closed_frame_submissions_)); // First release the fences since they may reference fence_completion_event_. queue_operations_done_since_submission_signal_ = false; queue_operations_since_submission_fence_last_ = 0; ui::d3d12::util::ReleaseAndNull(queue_operations_since_submission_fence_); ui::d3d12::util::ReleaseAndNull(submission_fence_); submission_open_ = false; submission_current_ = 1; submission_completed_ = 0; if (fence_completion_event_) { CloseHandle(fence_completion_event_); fence_completion_event_ = nullptr; } device_removed_ = false; CommandProcessor::ShutdownContext(); } void D3D12CommandProcessor::WriteRegister(uint32_t index, uint32_t value) { CommandProcessor::WriteRegister(index, value); if (index >= XE_GPU_REG_SHADER_CONSTANT_000_X && index <= XE_GPU_REG_SHADER_CONSTANT_511_W) { if (frame_open_) { uint32_t float_constant_index = (index - XE_GPU_REG_SHADER_CONSTANT_000_X) >> 2; if (float_constant_index >= 256) { float_constant_index -= 256; if (current_float_constant_map_pixel_[float_constant_index >> 6] & (1ull << (float_constant_index & 63))) { cbuffer_binding_float_pixel_.up_to_date = false; } } else { if (current_float_constant_map_vertex_[float_constant_index >> 6] & (1ull << (float_constant_index & 63))) { cbuffer_binding_float_vertex_.up_to_date = false; } } } } else if (index >= XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 && index <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31) { cbuffer_binding_bool_loop_.up_to_date = false; } else if (index >= XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 && index <= XE_GPU_REG_SHADER_CONSTANT_FETCH_31_5) { cbuffer_binding_fetch_.up_to_date = false; if (texture_cache_ != nullptr) { texture_cache_->TextureFetchConstantWritten( (index - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) / 6); } } } void D3D12CommandProcessor::OnGammaRamp256EntryTableValueWritten() { gamma_ramp_256_entry_table_up_to_date_ = false; } void D3D12CommandProcessor::OnGammaRampPWLValueWritten() { gamma_ramp_pwl_up_to_date_ = false; } void D3D12CommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width, uint32_t frontbuffer_height) { SCOPE_profile_cpu_f("gpu"); ui::Presenter* presenter = graphics_system_->presenter(); if (!presenter) { return; } // In case the swap command is the only one in the frame. if (!BeginSubmission(true)) { return; } // Obtain the actual front buffer size to pass to RefreshGuestOutput, // resolution-scaled if it's a resolve destination, or not otherwise. D3D12_SHADER_RESOURCE_VIEW_DESC swap_texture_srv_desc; xenos::TextureFormat frontbuffer_format; ID3D12Resource* swap_texture_resource = texture_cache_->RequestSwapTexture( swap_texture_srv_desc, frontbuffer_format); if (!swap_texture_resource) { return; } D3D12_RESOURCE_DESC swap_texture_desc = swap_texture_resource->GetDesc(); presenter->RefreshGuestOutput( uint32_t(swap_texture_desc.Width), uint32_t(swap_texture_desc.Height), 1280, 720, [this, &swap_texture_srv_desc, frontbuffer_format, swap_texture_resource, &swap_texture_desc]( ui::Presenter::GuestOutputRefreshContext& context) -> bool { const ui::d3d12::D3D12Provider& provider = GetD3D12Provider(); ID3D12Device* device = provider.GetDevice(); SwapPostEffect swap_post_effect = GetActualSwapPostEffect(); bool use_fxaa = swap_post_effect == SwapPostEffect::kFxaa || swap_post_effect == SwapPostEffect::kFxaaExtreme; if (use_fxaa) { // Make sure the texture of the correct size is available for FXAA. if (fxaa_source_texture_) { D3D12_RESOURCE_DESC fxaa_source_texture_desc = fxaa_source_texture_->GetDesc(); if (fxaa_source_texture_desc.Width != swap_texture_desc.Width || fxaa_source_texture_desc.Height != swap_texture_desc.Height) { if (submission_completed_ < fxaa_source_texture_submission_) { fxaa_source_texture_->AddRef(); resources_for_deletion_.emplace_back( fxaa_source_texture_submission_, fxaa_source_texture_.Get()); } fxaa_source_texture_.Reset(); fxaa_source_texture_submission_ = 0; } } if (!fxaa_source_texture_) { D3D12_RESOURCE_DESC fxaa_source_texture_desc; fxaa_source_texture_desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; fxaa_source_texture_desc.Alignment = 0; fxaa_source_texture_desc.Width = swap_texture_desc.Width; fxaa_source_texture_desc.Height = swap_texture_desc.Height; fxaa_source_texture_desc.DepthOrArraySize = 1; fxaa_source_texture_desc.MipLevels = 1; fxaa_source_texture_desc.Format = kFxaaSourceTextureFormat; fxaa_source_texture_desc.SampleDesc.Count = 1; fxaa_source_texture_desc.SampleDesc.Quality = 0; fxaa_source_texture_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; fxaa_source_texture_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; if (FAILED(device->CreateCommittedResource( &ui::d3d12::util::kHeapPropertiesDefault, provider.GetHeapFlagCreateNotZeroed(), &fxaa_source_texture_desc, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, nullptr, IID_PPV_ARGS(&fxaa_source_texture_)))) { XELOGE("Failed to create the FXAA input texture"); swap_post_effect = SwapPostEffect::kNone; use_fxaa = false; } } } // This is according to D3D::InitializePresentationParameters from a // game executable, which initializes the 256-entry table gamma ramp for // 8_8_8_8 output and the PWL gamma ramp for 2_10_10_10. // TODO(Triang3l): Choose between the table and PWL based on // DC_LUTA_CONTROL, support both for all formats (and also different // increments for PWL). bool use_pwl_gamma_ramp = frontbuffer_format == xenos::TextureFormat::k_2_10_10_10 || frontbuffer_format == xenos::TextureFormat::k_2_10_10_10_AS_16_16_16_16; context.SetIs8bpc(!use_pwl_gamma_ramp && !use_fxaa); // Upload the new gamma ramp, using the upload buffer for the current // frame (will close the frame after this anyway, so can't write // multiple times per frame). if (!(use_pwl_gamma_ramp ? gamma_ramp_pwl_up_to_date_ : gamma_ramp_256_entry_table_up_to_date_)) { uint32_t gamma_ramp_offset_bytes = use_pwl_gamma_ramp ? 256 * 4 : 0; uint32_t gamma_ramp_upload_offset_bytes = uint32_t(frame_current_ % kQueueFrames) * ((256 + 128 * 3) * 4) + gamma_ramp_offset_bytes; uint32_t gamma_ramp_size_bytes = (use_pwl_gamma_ramp ? 128 * 3 : 256) * 4; if (std::endian::native != std::endian::little && use_pwl_gamma_ramp) { // R16G16 is first R16, where the shader expects the base, and // second G16, where the delta should be, but gamma_ramp_pwl_rgb() // is an array of 32-bit DC_LUT_PWL_DATA registers - swap 16 bits in // each 32. auto gamma_ramp_pwl_upload_buffer = reinterpret_cast( gamma_ramp_upload_buffer_mapping_ + gamma_ramp_upload_offset_bytes); const reg::DC_LUT_PWL_DATA* gamma_ramp_pwl = gamma_ramp_pwl_rgb(); for (size_t i = 0; i < 128 * 3; ++i) { reg::DC_LUT_PWL_DATA& gamma_ramp_pwl_upload_buffer_entry = gamma_ramp_pwl_upload_buffer[i]; reg::DC_LUT_PWL_DATA gamma_ramp_pwl_entry = gamma_ramp_pwl[i]; gamma_ramp_pwl_upload_buffer_entry.base = gamma_ramp_pwl_entry.delta; gamma_ramp_pwl_upload_buffer_entry.delta = gamma_ramp_pwl_entry.base; } } else { std::memcpy( gamma_ramp_upload_buffer_mapping_ + gamma_ramp_upload_offset_bytes, use_pwl_gamma_ramp ? static_cast(gamma_ramp_pwl_rgb()) : static_cast(gamma_ramp_256_entry_table()), gamma_ramp_size_bytes); } PushTransitionBarrier(gamma_ramp_buffer_.Get(), gamma_ramp_buffer_state_, D3D12_RESOURCE_STATE_COPY_DEST); gamma_ramp_buffer_state_ = D3D12_RESOURCE_STATE_COPY_DEST; SubmitBarriers(); deferred_command_list_.D3DCopyBufferRegion( gamma_ramp_buffer_.Get(), gamma_ramp_offset_bytes, gamma_ramp_upload_buffer_.Get(), gamma_ramp_upload_offset_bytes, gamma_ramp_size_bytes); (use_pwl_gamma_ramp ? gamma_ramp_pwl_up_to_date_ : gamma_ramp_256_entry_table_up_to_date_) = true; } // Destination, source, and if bindful, gamma ramp. ui::d3d12::util::DescriptorCpuGpuHandlePair apply_gamma_descriptors[3]; ui::d3d12::util::DescriptorCpuGpuHandlePair apply_gamma_descriptor_gamma_ramp; if (!RequestOneUseSingleViewDescriptors( bindless_resources_used_ ? 2 : 3, apply_gamma_descriptors)) { return false; } // Must not call anything that can change the descriptor heap from now // on! if (bindless_resources_used_) { apply_gamma_descriptor_gamma_ramp = GetSystemBindlessViewHandlePair( use_pwl_gamma_ramp ? SystemBindlessView::kGammaRampPWLSRV : SystemBindlessView::kGammaRampTableSRV); } else { apply_gamma_descriptor_gamma_ramp = apply_gamma_descriptors[2]; WriteGammaRampSRV(use_pwl_gamma_ramp, apply_gamma_descriptor_gamma_ramp.first); } ID3D12Resource* guest_output_resource = static_cast< ui::d3d12::D3D12Presenter::D3D12GuestOutputRefreshContext&>( context) .resource_uav_capable(); if (use_fxaa) { fxaa_source_texture_submission_ = submission_current_; } ID3D12Resource* apply_gamma_dest = use_fxaa ? fxaa_source_texture_.Get() : guest_output_resource; D3D12_RESOURCE_STATES apply_gamma_dest_initial_state = use_fxaa ? D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE : ui::d3d12::D3D12Presenter::kGuestOutputInternalState; static_cast( context) .resource_uav_capable(); PushTransitionBarrier(apply_gamma_dest, apply_gamma_dest_initial_state, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); // From now on, even in case of failure, apply_gamma_dest must be // transitioned back to apply_gamma_dest_initial_state! D3D12_UNORDERED_ACCESS_VIEW_DESC apply_gamma_dest_uav_desc; apply_gamma_dest_uav_desc.Format = use_fxaa ? kFxaaSourceTextureFormat : ui::d3d12::D3D12Presenter::kGuestOutputFormat; apply_gamma_dest_uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; apply_gamma_dest_uav_desc.Texture2D.MipSlice = 0; apply_gamma_dest_uav_desc.Texture2D.PlaneSlice = 0; device->CreateUnorderedAccessView(apply_gamma_dest, nullptr, &apply_gamma_dest_uav_desc, apply_gamma_descriptors[0].first); device->CreateShaderResourceView(swap_texture_resource, &swap_texture_srv_desc, apply_gamma_descriptors[1].first); PushTransitionBarrier(gamma_ramp_buffer_.Get(), gamma_ramp_buffer_state_, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); gamma_ramp_buffer_state_ = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; deferred_command_list_.D3DSetComputeRootSignature( apply_gamma_root_signature_.Get()); ApplyGammaConstants apply_gamma_constants; apply_gamma_constants.size[0] = uint32_t(swap_texture_desc.Width); apply_gamma_constants.size[1] = uint32_t(swap_texture_desc.Height); deferred_command_list_.D3DSetComputeRoot32BitConstants( UINT(ApplyGammaRootParameter::kConstants), sizeof(apply_gamma_constants) / sizeof(uint32_t), &apply_gamma_constants, 0); deferred_command_list_.D3DSetComputeRootDescriptorTable( UINT(ApplyGammaRootParameter::kDestination), apply_gamma_descriptors[0].second); deferred_command_list_.D3DSetComputeRootDescriptorTable( UINT(ApplyGammaRootParameter::kSource), apply_gamma_descriptors[1].second); deferred_command_list_.D3DSetComputeRootDescriptorTable( UINT(ApplyGammaRootParameter::kRamp), apply_gamma_descriptor_gamma_ramp.second); ID3D12PipelineState* apply_gamma_pipeline; if (use_pwl_gamma_ramp) { apply_gamma_pipeline = use_fxaa ? apply_gamma_pwl_fxaa_luma_pipeline_.Get() : apply_gamma_pwl_pipeline_.Get(); } else { apply_gamma_pipeline = use_fxaa ? apply_gamma_table_fxaa_luma_pipeline_.Get() : apply_gamma_table_pipeline_.Get(); } SetExternalPipeline(apply_gamma_pipeline); SubmitBarriers(); uint32_t group_count_x = (uint32_t(swap_texture_desc.Width) + 15) / 16; uint32_t group_count_y = (uint32_t(swap_texture_desc.Height) + 7) / 8; deferred_command_list_.D3DDispatch(group_count_x, group_count_y, 1); // Apply FXAA. if (use_fxaa) { // Destination and source. ui::d3d12::util::DescriptorCpuGpuHandlePair fxaa_descriptors[2]; if (!RequestOneUseSingleViewDescriptors( uint32_t(xe::countof(fxaa_descriptors)), fxaa_descriptors)) { // Failed to obtain descriptors for FXAA - just copy after gamma // ramp application without applying FXAA. PushTransitionBarrier(apply_gamma_dest, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); PushTransitionBarrier( guest_output_resource, ui::d3d12::D3D12Presenter::kGuestOutputInternalState, D3D12_RESOURCE_STATE_COPY_DEST); SubmitBarriers(); deferred_command_list_.D3DCopyResource(guest_output_resource, apply_gamma_dest); PushTransitionBarrier(apply_gamma_dest, D3D12_RESOURCE_STATE_COPY_SOURCE, apply_gamma_dest_initial_state); PushTransitionBarrier( guest_output_resource, D3D12_RESOURCE_STATE_COPY_DEST, ui::d3d12::D3D12Presenter::kGuestOutputInternalState); return false; } else { assert_true(apply_gamma_dest_initial_state == D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); PushTransitionBarrier(apply_gamma_dest, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, apply_gamma_dest_initial_state); PushTransitionBarrier( guest_output_resource, ui::d3d12::D3D12Presenter::kGuestOutputInternalState, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); // From now on, even in case of failure, guest_output_resource must // be transitioned back to kGuestOutputInternalState! deferred_command_list_.D3DSetComputeRootSignature( fxaa_root_signature_.Get()); FxaaConstants fxaa_constants; fxaa_constants.size[0] = uint32_t(swap_texture_desc.Width); fxaa_constants.size[1] = uint32_t(swap_texture_desc.Height); fxaa_constants.size_inv[0] = 1.0f / float(fxaa_constants.size[0]); fxaa_constants.size_inv[1] = 1.0f / float(fxaa_constants.size[1]); deferred_command_list_.D3DSetComputeRoot32BitConstants( UINT(FxaaRootParameter::kConstants), sizeof(fxaa_constants) / sizeof(uint32_t), &fxaa_constants, 0); D3D12_UNORDERED_ACCESS_VIEW_DESC fxaa_dest_uav_desc; fxaa_dest_uav_desc.Format = ui::d3d12::D3D12Presenter::kGuestOutputFormat; fxaa_dest_uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; fxaa_dest_uav_desc.Texture2D.MipSlice = 0; fxaa_dest_uav_desc.Texture2D.PlaneSlice = 0; device->CreateUnorderedAccessView(guest_output_resource, nullptr, &fxaa_dest_uav_desc, fxaa_descriptors[0].first); deferred_command_list_.D3DSetComputeRootDescriptorTable( UINT(FxaaRootParameter::kDestination), fxaa_descriptors[0].second); D3D12_SHADER_RESOURCE_VIEW_DESC fxaa_source_srv_desc; fxaa_source_srv_desc.Format = kFxaaSourceTextureFormat; fxaa_source_srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; fxaa_source_srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; fxaa_source_srv_desc.Texture2D.MostDetailedMip = 0; fxaa_source_srv_desc.Texture2D.MipLevels = 1; fxaa_source_srv_desc.Texture2D.PlaneSlice = 0; fxaa_source_srv_desc.Texture2D.ResourceMinLODClamp = 0.0f; device->CreateShaderResourceView(fxaa_source_texture_.Get(), &fxaa_source_srv_desc, fxaa_descriptors[1].first); deferred_command_list_.D3DSetComputeRootDescriptorTable( UINT(FxaaRootParameter::kSource), fxaa_descriptors[1].second); SetExternalPipeline(swap_post_effect == SwapPostEffect::kFxaaExtreme ? fxaa_extreme_pipeline_.Get() : fxaa_pipeline_.Get()); SubmitBarriers(); deferred_command_list_.D3DDispatch(group_count_x, group_count_y, 1); PushTransitionBarrier( guest_output_resource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, ui::d3d12::D3D12Presenter::kGuestOutputInternalState); } } else { assert_true(apply_gamma_dest_initial_state == ui::d3d12::D3D12Presenter::kGuestOutputInternalState); PushTransitionBarrier(apply_gamma_dest, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, apply_gamma_dest_initial_state); } // Need to submit all the commands before giving the image back to the // presenter so it can submit its own commands for displaying it to the // queue. SubmitBarriers(); EndSubmission(true); return true; }); // End the frame even if did not present for any reason (the image refresher // was not called), to prevent leaking per-frame resources. EndSubmission(true); } void D3D12CommandProcessor::OnPrimaryBufferEnd() { if (cvars::d3d12_submit_on_primary_buffer_end && submission_open_ && CanEndSubmissionImmediately()) { EndSubmission(false); } } Shader* D3D12CommandProcessor::LoadShader(xenos::ShaderType shader_type, uint32_t guest_address, const uint32_t* host_address, uint32_t dword_count) { return pipeline_cache_->LoadShader(shader_type, host_address, dword_count); } bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, uint32_t index_count, IndexBufferInfo* index_buffer_info, bool major_mode_explicit) { #if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES ID3D12Device* device = GetD3D12Provider().GetDevice(); const RegisterFile& regs = *register_file_; xenos::ModeControl edram_mode = regs.Get().edram_mode; if (edram_mode == xenos::ModeControl::kCopy) { // Special copy handling. return IssueCopy(); } if (regs.Get().surface_pitch == 0) { // Doesn't actually draw. // TODO(Triang3l): Do something so memexport still works in this case maybe? // Unlikely that zero would even really be legal though. return true; } // Vertex shader analysis. auto vertex_shader = static_cast(active_vertex_shader()); if (!vertex_shader) { // Always need a vertex shader. return false; } pipeline_cache_->AnalyzeShaderUcode(*vertex_shader); bool memexport_used_vertex = vertex_shader->is_valid_memexport_used(); // Pixel shader analysis. bool primitive_polygonal = draw_util::IsPrimitivePolygonal(regs); bool is_rasterization_done = draw_util::IsRasterizationPotentiallyDone(regs, primitive_polygonal); D3D12Shader* pixel_shader = nullptr; if (is_rasterization_done) { // See xenos::ModeControl for explanation why the pixel shader is only used // when it's kColorDepth here. if (edram_mode == xenos::ModeControl::kColorDepth) { pixel_shader = static_cast(active_pixel_shader()); if (pixel_shader) { pipeline_cache_->AnalyzeShaderUcode(*pixel_shader); if (!draw_util::IsPixelShaderNeededWithRasterization(*pixel_shader, regs)) { pixel_shader = nullptr; } } } } else { // Disabling pixel shader for this case is also required by the pipeline // cache. if (!memexport_used_vertex) { // This draw has no effect. return true; } } bool memexport_used_pixel = pixel_shader && pixel_shader->is_valid_memexport_used(); bool memexport_used = memexport_used_vertex || memexport_used_pixel; if (!BeginSubmission(true)) { return false; } // Process primitives. PrimitiveProcessor::ProcessingResult primitive_processing_result; if (!primitive_processor_->Process(primitive_processing_result)) { return false; } if (!primitive_processing_result.host_draw_vertex_count) { // Nothing to draw. return true; } reg::RB_DEPTHCONTROL normalized_depth_control = draw_util::GetNormalizedDepthControl(regs); // Shader modifications. DxbcShaderTranslator::Modification vertex_shader_modification = pipeline_cache_->GetCurrentVertexShaderModification( *vertex_shader, primitive_processing_result.host_vertex_shader_type); DxbcShaderTranslator::Modification pixel_shader_modification = pixel_shader ? pipeline_cache_->GetCurrentPixelShaderModification( *pixel_shader, normalized_depth_control) : DxbcShaderTranslator::Modification(0); // Set up the render targets - this may perform dispatches and draws. uint32_t normalized_color_mask = pixel_shader ? draw_util::GetNormalizedColorMask( regs, pixel_shader->writes_color_targets()) : 0; if (!render_target_cache_->Update(is_rasterization_done, normalized_depth_control, normalized_color_mask, *vertex_shader)) { return false; } // Create the pipeline (for this, need the actually used render target formats // from the render target cache), translating the shaders - doing this now to // obtain the used textures. D3D12Shader::D3D12Translation* vertex_shader_translation = static_cast( vertex_shader->GetOrCreateTranslation( vertex_shader_modification.value)); D3D12Shader::D3D12Translation* pixel_shader_translation = pixel_shader ? static_cast( pixel_shader->GetOrCreateTranslation( pixel_shader_modification.value)) : nullptr; uint32_t bound_depth_and_color_render_target_bits; uint32_t bound_depth_and_color_render_target_formats [1 + xenos::kMaxColorRenderTargets]; bool host_render_targets_used = render_target_cache_->GetPath() == RenderTargetCache::Path::kHostRenderTargets; if (host_render_targets_used) { bound_depth_and_color_render_target_bits = render_target_cache_->GetLastUpdateBoundRenderTargets( render_target_cache_->gamma_render_target_as_srgb(), bound_depth_and_color_render_target_formats); } else { bound_depth_and_color_render_target_bits = 0; } void* pipeline_handle; ID3D12RootSignature* root_signature; if (!pipeline_cache_->ConfigurePipeline( vertex_shader_translation, pixel_shader_translation, primitive_processing_result, normalized_depth_control, normalized_color_mask, bound_depth_and_color_render_target_bits, bound_depth_and_color_render_target_formats, &pipeline_handle, &root_signature)) { return false; } // Update the textures - this may bind pipelines. uint32_t used_texture_mask = vertex_shader->GetUsedTextureMaskAfterTranslation() | (pixel_shader != nullptr ? pixel_shader->GetUsedTextureMaskAfterTranslation() : 0); texture_cache_->RequestTextures(used_texture_mask); // Bind the pipeline after configuring it and doing everything that may bind // other pipelines. if (current_guest_pipeline_ != pipeline_handle) { deferred_command_list_.SetPipelineStateHandle( reinterpret_cast(pipeline_handle)); current_guest_pipeline_ = pipeline_handle; current_external_pipeline_ = nullptr; } // Get dynamic rasterizer state. uint32_t draw_resolution_scale_x = texture_cache_->draw_resolution_scale_x(); uint32_t draw_resolution_scale_y = texture_cache_->draw_resolution_scale_y(); draw_util::ViewportInfo viewport_info; draw_util::GetHostViewportInfo( regs, draw_resolution_scale_x, draw_resolution_scale_y, true, D3D12_VIEWPORT_BOUNDS_MAX, D3D12_VIEWPORT_BOUNDS_MAX, false, normalized_depth_control, host_render_targets_used && render_target_cache_->depth_float24_convert_in_pixel_shader(), host_render_targets_used, pixel_shader && pixel_shader->writes_depth(), viewport_info); draw_util::Scissor scissor; draw_util::GetScissor(regs, scissor); scissor.offset[0] *= draw_resolution_scale_x; scissor.offset[1] *= draw_resolution_scale_y; scissor.extent[0] *= draw_resolution_scale_x; scissor.extent[1] *= draw_resolution_scale_y; // Update viewport, scissor, blend factor and stencil reference. UpdateFixedFunctionState(viewport_info, scissor, primitive_polygonal, normalized_depth_control); // Update system constants before uploading them. // TODO(Triang3l): With ROV, pass the disabled render target mask for safety. UpdateSystemConstantValues( memexport_used, primitive_polygonal, primitive_processing_result.line_loop_closing_index, primitive_processing_result.host_index_endian, viewport_info, used_texture_mask, normalized_depth_control, normalized_color_mask); // Update constant buffers, descriptors and root parameters. if (!UpdateBindings(vertex_shader, pixel_shader, root_signature)) { return false; } // Must not call anything that can change the descriptor heap from now on! // Ensure vertex buffers are resident. // TODO(Triang3l): Cache residency for ranges in a way similar to how texture // validity is tracked. const Shader::ConstantRegisterMap& constant_map_vertex = vertex_shader->constant_register_map(); for (uint32_t i = 0; i < xe::countof(constant_map_vertex.vertex_fetch_bitmap); ++i) { uint32_t vfetch_bits_remaining = constant_map_vertex.vertex_fetch_bitmap[i]; uint32_t j; while (xe::bit_scan_forward(vfetch_bits_remaining, &j)) { vfetch_bits_remaining &= ~(uint32_t(1) << j); uint32_t vfetch_index = i * 32 + j; const auto& vfetch_constant = regs.Get( XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2); switch (vfetch_constant.type) { case xenos::FetchConstantType::kVertex: break; case xenos::FetchConstantType::kInvalidVertex: if (cvars::gpu_allow_invalid_fetch_constants) { break; } XELOGW( "Vertex fetch constant {} ({:08X} {:08X}) has \"invalid\" type! " "This is incorrect behavior, but you can try bypassing this by " "launching Xenia with --gpu_allow_invalid_fetch_constants=true.", vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1); return false; default: XELOGW( "Vertex fetch constant {} ({:08X} {:08X}) is completely invalid!", vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1); return false; } if (!shared_memory_->RequestRange(vfetch_constant.address << 2, vfetch_constant.size << 2)) { XELOGE( "Failed to request vertex buffer at 0x{:08X} (size {}) in the " "shared memory", vfetch_constant.address << 2, vfetch_constant.size << 2); return false; } } } // Gather memexport ranges and ensure the heaps for them are resident, and // also load the data surrounding the export and to fill the regions that // won't be modified by the shaders. struct MemExportRange { uint32_t base_address_dwords; uint32_t size_dwords; }; MemExportRange memexport_ranges[512]; uint32_t memexport_range_count = 0; if (memexport_used_vertex) { for (uint32_t constant_index : vertex_shader->memexport_stream_constants()) { const auto& memexport_stream = regs.Get( XE_GPU_REG_SHADER_CONSTANT_000_X + constant_index * 4); if (memexport_stream.index_count == 0) { continue; } uint32_t memexport_format_size = GetSupportedMemExportFormatSize(memexport_stream.format); if (memexport_format_size == 0) { XELOGE("Unsupported memexport format {}", FormatInfo::Get( xenos::TextureFormat(uint32_t(memexport_stream.format))) ->name); return false; } uint32_t memexport_size_dwords = memexport_stream.index_count * memexport_format_size; // Try to reduce the number of shared memory operations when writing // different elements into the same buffer through different exports // (happens in 4D5307E6). bool memexport_range_reused = false; for (uint32_t i = 0; i < memexport_range_count; ++i) { MemExportRange& memexport_range = memexport_ranges[i]; if (memexport_range.base_address_dwords == memexport_stream.base_address) { memexport_range.size_dwords = std::max(memexport_range.size_dwords, memexport_size_dwords); memexport_range_reused = true; break; } } // Add a new range if haven't expanded an existing one. if (!memexport_range_reused) { MemExportRange& memexport_range = memexport_ranges[memexport_range_count++]; memexport_range.base_address_dwords = memexport_stream.base_address; memexport_range.size_dwords = memexport_size_dwords; } } } if (memexport_used_pixel) { for (uint32_t constant_index : pixel_shader->memexport_stream_constants()) { const auto& memexport_stream = regs.Get( XE_GPU_REG_SHADER_CONSTANT_256_X + constant_index * 4); if (memexport_stream.index_count == 0) { continue; } uint32_t memexport_format_size = GetSupportedMemExportFormatSize(memexport_stream.format); if (memexport_format_size == 0) { XELOGE("Unsupported memexport format {}", FormatInfo::Get( xenos::TextureFormat(uint32_t(memexport_stream.format))) ->name); return false; } uint32_t memexport_size_dwords = memexport_stream.index_count * memexport_format_size; bool memexport_range_reused = false; for (uint32_t i = 0; i < memexport_range_count; ++i) { MemExportRange& memexport_range = memexport_ranges[i]; if (memexport_range.base_address_dwords == memexport_stream.base_address) { memexport_range.size_dwords = std::max(memexport_range.size_dwords, memexport_size_dwords); memexport_range_reused = true; break; } } if (!memexport_range_reused) { MemExportRange& memexport_range = memexport_ranges[memexport_range_count++]; memexport_range.base_address_dwords = memexport_stream.base_address; memexport_range.size_dwords = memexport_size_dwords; } } } for (uint32_t i = 0; i < memexport_range_count; ++i) { const MemExportRange& memexport_range = memexport_ranges[i]; if (!shared_memory_->RequestRange(memexport_range.base_address_dwords << 2, memexport_range.size_dwords << 2)) { XELOGE( "Failed to request memexport stream at 0x{:08X} (size {}) in the " "shared memory", memexport_range.base_address_dwords << 2, memexport_range.size_dwords << 2); return false; } } // Primitive topology. D3D_PRIMITIVE_TOPOLOGY primitive_topology; if (primitive_processing_result.IsTessellated()) { switch (primitive_processing_result.host_primitive_type) { // TODO(Triang3l): Support all primitive types. case xenos::PrimitiveType::kTriangleList: case xenos::PrimitiveType::kTrianglePatch: primitive_topology = D3D_PRIMITIVE_TOPOLOGY_3_CONTROL_POINT_PATCHLIST; break; case xenos::PrimitiveType::kQuadList: case xenos::PrimitiveType::kQuadPatch: primitive_topology = D3D_PRIMITIVE_TOPOLOGY_4_CONTROL_POINT_PATCHLIST; break; default: XELOGE( "Host tessellated primitive type {} returned by the primitive " "processor is not supported by the Direct3D 12 command processor", uint32_t(primitive_processing_result.host_primitive_type)); assert_unhandled_case(primitive_processing_result.host_primitive_type); return false; } } else { switch (primitive_processing_result.host_primitive_type) { case xenos::PrimitiveType::kPointList: primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; break; case xenos::PrimitiveType::kLineList: primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; break; case xenos::PrimitiveType::kLineStrip: primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; break; case xenos::PrimitiveType::kTriangleList: case xenos::PrimitiveType::kRectangleList: primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; case xenos::PrimitiveType::kTriangleStrip: primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; case xenos::PrimitiveType::kQuadList: primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ; break; default: XELOGE( "Host primitive type {} returned by the primitive processor is not " "supported by the Direct3D 12 command processor", uint32_t(primitive_processing_result.host_primitive_type)); assert_unhandled_case(primitive_processing_result.host_primitive_type); return false; } } SetPrimitiveTopology(primitive_topology); // Must not call anything that may change the primitive topology from now on! // Draw. if (primitive_processing_result.index_buffer_type == PrimitiveProcessor::ProcessedIndexBufferType::kNone) { if (memexport_used) { shared_memory_->UseForWriting(); } else { shared_memory_->UseForReading(); } SubmitBarriers(); deferred_command_list_.D3DDrawInstanced( primitive_processing_result.host_draw_vertex_count, 1, 0, 0); } else { D3D12_INDEX_BUFFER_VIEW index_buffer_view; index_buffer_view.SizeInBytes = primitive_processing_result.host_draw_vertex_count; if (primitive_processing_result.host_index_format == xenos::IndexFormat::kInt16) { index_buffer_view.SizeInBytes *= sizeof(uint16_t); index_buffer_view.Format = DXGI_FORMAT_R16_UINT; } else { index_buffer_view.SizeInBytes *= sizeof(uint32_t); index_buffer_view.Format = DXGI_FORMAT_R32_UINT; } ID3D12Resource* scratch_index_buffer = nullptr; switch (primitive_processing_result.index_buffer_type) { case PrimitiveProcessor::ProcessedIndexBufferType::kGuest: { if (memexport_used) { // If the shared memory is a UAV, it can't be used as an index buffer // (UAV is a read/write state, index buffer is a read-only state). // Need to copy the indices to a buffer in the index buffer state. scratch_index_buffer = RequestScratchGPUBuffer( index_buffer_view.SizeInBytes, D3D12_RESOURCE_STATE_COPY_DEST); if (scratch_index_buffer == nullptr) { return false; } shared_memory_->UseAsCopySource(); SubmitBarriers(); deferred_command_list_.D3DCopyBufferRegion( scratch_index_buffer, 0, shared_memory_->GetBuffer(), primitive_processing_result.guest_index_base, index_buffer_view.SizeInBytes); PushTransitionBarrier(scratch_index_buffer, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_INDEX_BUFFER); index_buffer_view.BufferLocation = scratch_index_buffer->GetGPUVirtualAddress(); } else { index_buffer_view.BufferLocation = shared_memory_->GetGPUAddress() + primitive_processing_result.guest_index_base; } } break; case PrimitiveProcessor::ProcessedIndexBufferType::kHostConverted: index_buffer_view.BufferLocation = primitive_processor_->GetConvertedIndexBufferGpuAddress( primitive_processing_result.host_index_buffer_handle); break; case PrimitiveProcessor::ProcessedIndexBufferType::kHostBuiltin: index_buffer_view.BufferLocation = primitive_processor_->GetBuiltinIndexBufferGpuAddress( primitive_processing_result.host_index_buffer_handle); break; default: assert_unhandled_case(primitive_processing_result.index_buffer_type); return false; } deferred_command_list_.D3DIASetIndexBuffer(&index_buffer_view); if (memexport_used) { shared_memory_->UseForWriting(); } else { shared_memory_->UseForReading(); } SubmitBarriers(); deferred_command_list_.D3DDrawIndexedInstanced( primitive_processing_result.host_draw_vertex_count, 1, 0, 0, 0); if (scratch_index_buffer != nullptr) { ReleaseScratchGPUBuffer(scratch_index_buffer, D3D12_RESOURCE_STATE_INDEX_BUFFER); } } if (memexport_used) { // Make sure this memexporting draw is ordered with other work using shared // memory as a UAV. // TODO(Triang3l): Find some PM4 command that can be used for indication of // when memexports should be awaited? shared_memory_->MarkUAVWritesCommitNeeded(); // Invalidate textures in memexported memory and watch for changes. for (uint32_t i = 0; i < memexport_range_count; ++i) { const MemExportRange& memexport_range = memexport_ranges[i]; shared_memory_->RangeWrittenByGpu( memexport_range.base_address_dwords << 2, memexport_range.size_dwords << 2, false); } if (cvars::d3d12_readback_memexport) { // Read the exported data on the CPU. uint32_t memexport_total_size = 0; for (uint32_t i = 0; i < memexport_range_count; ++i) { memexport_total_size += memexport_ranges[i].size_dwords << 2; } if (memexport_total_size != 0) { ID3D12Resource* readback_buffer = RequestReadbackBuffer(memexport_total_size); if (readback_buffer != nullptr) { shared_memory_->UseAsCopySource(); SubmitBarriers(); ID3D12Resource* shared_memory_buffer = shared_memory_->GetBuffer(); uint32_t readback_buffer_offset = 0; for (uint32_t i = 0; i < memexport_range_count; ++i) { const MemExportRange& memexport_range = memexport_ranges[i]; uint32_t memexport_range_size = memexport_range.size_dwords << 2; deferred_command_list_.D3DCopyBufferRegion( readback_buffer, readback_buffer_offset, shared_memory_buffer, memexport_range.base_address_dwords << 2, memexport_range_size); readback_buffer_offset += memexport_range_size; } if (AwaitAllQueueOperationsCompletion()) { D3D12_RANGE readback_range; readback_range.Begin = 0; readback_range.End = memexport_total_size; void* readback_mapping; if (SUCCEEDED(readback_buffer->Map(0, &readback_range, &readback_mapping))) { const uint32_t* readback_dwords = reinterpret_cast(readback_mapping); for (uint32_t i = 0; i < memexport_range_count; ++i) { const MemExportRange& memexport_range = memexport_ranges[i]; std::memcpy(memory_->TranslatePhysical( memexport_range.base_address_dwords << 2), readback_dwords, memexport_range.size_dwords << 2); readback_dwords += memexport_range.size_dwords; } D3D12_RANGE readback_write_range = {}; readback_buffer->Unmap(0, &readback_write_range); } } } } } } return true; } void D3D12CommandProcessor::InitializeTrace() { CommandProcessor::InitializeTrace(); if (!BeginSubmission(false)) { return; } bool render_target_cache_submitted = render_target_cache_->InitializeTraceSubmitDownloads(); bool shared_memory_submitted = shared_memory_->InitializeTraceSubmitDownloads(); if (!render_target_cache_submitted && !shared_memory_submitted) { return; } AwaitAllQueueOperationsCompletion(); if (render_target_cache_submitted) { render_target_cache_->InitializeTraceCompleteDownloads(); } if (shared_memory_submitted) { shared_memory_->InitializeTraceCompleteDownloads(); } } bool D3D12CommandProcessor::IssueCopy() { #if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES if (!BeginSubmission(true)) { return false; } uint32_t written_address, written_length; if (!render_target_cache_->Resolve(*memory_, *shared_memory_, *texture_cache_, written_address, written_length)) { return false; } if (cvars::d3d12_readback_resolve && !texture_cache_->IsDrawResolutionScaled() && written_length) { // Read the resolved data on the CPU. ID3D12Resource* readback_buffer = RequestReadbackBuffer(written_length); if (readback_buffer != nullptr) { shared_memory_->UseAsCopySource(); SubmitBarriers(); ID3D12Resource* shared_memory_buffer = shared_memory_->GetBuffer(); deferred_command_list_.D3DCopyBufferRegion( readback_buffer, 0, shared_memory_buffer, written_address, written_length); if (AwaitAllQueueOperationsCompletion()) { D3D12_RANGE readback_range; readback_range.Begin = 0; readback_range.End = written_length; void* readback_mapping; if (SUCCEEDED( readback_buffer->Map(0, &readback_range, &readback_mapping))) { std::memcpy(memory_->TranslatePhysical(written_address), readback_mapping, written_length); D3D12_RANGE readback_write_range = {}; readback_buffer->Unmap(0, &readback_write_range); } } } } return true; } void D3D12CommandProcessor::CheckSubmissionFence(uint64_t await_submission) { if (await_submission >= submission_current_) { if (submission_open_) { EndSubmission(false); } // Ending an open submission should result in queue operations done directly // (like UpdateTileMappings) to be tracked within the scope of that // submission, but just in case of a failure, or queue operations being done // outside of a submission, await explicitly. if (queue_operations_done_since_submission_signal_) { UINT64 fence_value = ++queue_operations_since_submission_fence_last_; ID3D12CommandQueue* direct_queue = GetD3D12Provider().GetDirectQueue(); if (SUCCEEDED( direct_queue->Signal(queue_operations_since_submission_fence_, fence_value) && SUCCEEDED(queue_operations_since_submission_fence_ ->SetEventOnCompletion(fence_value, fence_completion_event_)))) { WaitForSingleObject(fence_completion_event_, INFINITE); queue_operations_done_since_submission_signal_ = false; } else { XELOGE( "Failed to await an out-of-submission queue operation completion " "Direct3D 12 fence"); } } // A submission won't be ended if it hasn't been started, or if ending // has failed - clamp the index. await_submission = submission_current_ - 1; } uint64_t submission_completed_before = submission_completed_; submission_completed_ = submission_fence_->GetCompletedValue(); if (submission_completed_ < await_submission) { if (SUCCEEDED(submission_fence_->SetEventOnCompletion( await_submission, fence_completion_event_))) { WaitForSingleObject(fence_completion_event_, INFINITE); submission_completed_ = submission_fence_->GetCompletedValue(); } } if (submission_completed_ < await_submission) { XELOGE("Failed to await a submission completion Direct3D 12 fence"); } if (submission_completed_ <= submission_completed_before) { // Not updated - no need to reclaim or download things. return; } // Reclaim command allocators. while (command_allocator_submitted_first_) { if (command_allocator_submitted_first_->last_usage_submission > submission_completed_) { break; } if (command_allocator_writable_last_) { command_allocator_writable_last_->next = command_allocator_submitted_first_; } else { command_allocator_writable_first_ = command_allocator_submitted_first_; } command_allocator_writable_last_ = command_allocator_submitted_first_; command_allocator_submitted_first_ = command_allocator_submitted_first_->next; command_allocator_writable_last_->next = nullptr; } if (!command_allocator_submitted_first_) { command_allocator_submitted_last_ = nullptr; } // Release single-use bindless descriptors. while (!view_bindless_one_use_descriptors_.empty()) { if (view_bindless_one_use_descriptors_.front().second > submission_completed_) { break; } ReleaseViewBindlessDescriptorImmediately( view_bindless_one_use_descriptors_.front().first); view_bindless_one_use_descriptors_.pop_front(); } // Delete transient resources marked for deletion. while (!resources_for_deletion_.empty()) { if (resources_for_deletion_.front().first > submission_completed_) { break; } resources_for_deletion_.front().second->Release(); resources_for_deletion_.pop_front(); } shared_memory_->CompletedSubmissionUpdated(); render_target_cache_->CompletedSubmissionUpdated(); primitive_processor_->CompletedSubmissionUpdated(); texture_cache_->CompletedSubmissionUpdated(submission_completed_); } bool D3D12CommandProcessor::BeginSubmission(bool is_guest_command) { #if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES if (device_removed_) { return false; } bool is_opening_frame = is_guest_command && !frame_open_; if (submission_open_ && !is_opening_frame) { return true; } // Check if the device is still available. ID3D12Device* device = GetD3D12Provider().GetDevice(); HRESULT device_removed_reason = device->GetDeviceRemovedReason(); if (FAILED(device_removed_reason)) { device_removed_ = true; graphics_system_->OnHostGpuLossFromAnyThread(device_removed_reason != DXGI_ERROR_DEVICE_REMOVED); return false; } // Check the fence - needed for all kinds of submissions (to reclaim transient // resources early) and specifically for frames (not to queue too many), and // await the availability of the current frame. CheckSubmissionFence( is_opening_frame ? closed_frame_submissions_[frame_current_ % kQueueFrames] : 0); // TODO(Triang3l): If failed to await (completed submission < awaited frame // submission), do something like dropping the draw command that wanted to // open the frame. if (is_opening_frame) { // Update the completed frame index, also obtaining the actual completed // frame number (since the CPU may be actually less than 3 frames behind) // before reclaiming resources tracked with the frame number. frame_completed_ = std::max(frame_current_, uint64_t(kQueueFrames)) - kQueueFrames; for (uint64_t frame = frame_completed_ + 1; frame < frame_current_; ++frame) { if (closed_frame_submissions_[frame % kQueueFrames] > submission_completed_) { break; } frame_completed_ = frame; } } if (!submission_open_) { submission_open_ = true; // Start a new deferred command list - will submit it to the real one in the // end of the submission (when async pipeline creation requests are // fulfilled). deferred_command_list_.Reset(); // Reset cached state of the command list. ff_viewport_update_needed_ = true; ff_scissor_update_needed_ = true; ff_blend_factor_update_needed_ = true; ff_stencil_ref_update_needed_ = true; current_guest_pipeline_ = nullptr; current_external_pipeline_ = nullptr; current_graphics_root_signature_ = nullptr; current_graphics_root_up_to_date_ = 0; if (bindless_resources_used_) { deferred_command_list_.SetDescriptorHeaps(view_bindless_heap_, sampler_bindless_heap_current_); } else { view_bindful_heap_current_ = nullptr; sampler_bindful_heap_current_ = nullptr; } primitive_topology_ = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; render_target_cache_->BeginSubmission(); primitive_processor_->BeginSubmission(); texture_cache_->BeginSubmission(submission_current_); } if (is_opening_frame) { frame_open_ = true; // Reset bindings that depend on the data stored in the pools. std::memset(current_float_constant_map_vertex_, 0, sizeof(current_float_constant_map_vertex_)); std::memset(current_float_constant_map_pixel_, 0, sizeof(current_float_constant_map_pixel_)); cbuffer_binding_system_.up_to_date = false; cbuffer_binding_float_vertex_.up_to_date = false; cbuffer_binding_float_pixel_.up_to_date = false; cbuffer_binding_bool_loop_.up_to_date = false; cbuffer_binding_fetch_.up_to_date = false; if (bindless_resources_used_) { cbuffer_binding_descriptor_indices_vertex_.up_to_date = false; cbuffer_binding_descriptor_indices_pixel_.up_to_date = false; } else { draw_view_bindful_heap_index_ = ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid; draw_sampler_bindful_heap_index_ = ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid; bindful_textures_written_vertex_ = false; bindful_textures_written_pixel_ = false; bindful_samplers_written_vertex_ = false; bindful_samplers_written_pixel_ = false; } // Reclaim pool pages - no need to do this every small submission since some // may be reused. constant_buffer_pool_->Reclaim(frame_completed_); if (!bindless_resources_used_) { view_bindful_heap_pool_->Reclaim(frame_completed_); sampler_bindful_heap_pool_->Reclaim(frame_completed_); } pix_capturing_ = pix_capture_requested_.exchange(false, std::memory_order_relaxed); if (pix_capturing_) { IDXGraphicsAnalysis* graphics_analysis = GetD3D12Provider().GetGraphicsAnalysis(); if (graphics_analysis != nullptr) { graphics_analysis->BeginCapture(); } } primitive_processor_->BeginFrame(); texture_cache_->BeginFrame(); } return true; } bool D3D12CommandProcessor::EndSubmission(bool is_swap) { const ui::d3d12::D3D12Provider& provider = GetD3D12Provider(); // Make sure there is a command allocator to write commands to. if (submission_open_ && !command_allocator_writable_first_) { ID3D12CommandAllocator* command_allocator; if (FAILED(provider.GetDevice()->CreateCommandAllocator( D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&command_allocator)))) { XELOGE("Failed to create a command allocator"); // Try to submit later. Completely dropping the submission is not // permitted because resources would be left in an undefined state. return false; } command_allocator_writable_first_ = new CommandAllocator; command_allocator_writable_first_->command_allocator = command_allocator; command_allocator_writable_first_->last_usage_submission = 0; command_allocator_writable_first_->next = nullptr; command_allocator_writable_last_ = command_allocator_writable_first_; } bool is_closing_frame = is_swap && frame_open_; if (is_closing_frame) { texture_cache_->EndFrame(); primitive_processor_->EndFrame(); } if (submission_open_) { assert_false(scratch_buffer_used_); pipeline_cache_->EndSubmission(); // Submit barriers now because resources with the queued barriers may be // destroyed between frames. SubmitBarriers(); ID3D12CommandQueue* direct_queue = provider.GetDirectQueue(); // Submit the deferred command list. // Only one deferred command list must be executed in the same // ExecuteCommandLists - the boundaries of ExecuteCommandLists are a full // UAV and aliasing barrier, and subsystems of the emulator assume it // happens between Xenia submissions. ID3D12CommandAllocator* command_allocator = command_allocator_writable_first_->command_allocator; command_allocator->Reset(); command_list_->Reset(command_allocator, nullptr); deferred_command_list_.Execute(command_list_, command_list_1_); command_list_->Close(); ID3D12CommandList* execute_command_lists[] = {command_list_}; direct_queue->ExecuteCommandLists(1, execute_command_lists); command_allocator_writable_first_->last_usage_submission = submission_current_; if (command_allocator_submitted_last_) { command_allocator_submitted_last_->next = command_allocator_writable_first_; } else { command_allocator_submitted_first_ = command_allocator_writable_first_; } command_allocator_submitted_last_ = command_allocator_writable_first_; command_allocator_writable_first_ = command_allocator_writable_first_->next; command_allocator_submitted_last_->next = nullptr; if (!command_allocator_writable_first_) { command_allocator_writable_last_ = nullptr; } direct_queue->Signal(submission_fence_, submission_current_++); submission_open_ = false; // Queue operations done directly (like UpdateTileMappings) will be awaited // alongside the last submission if needed. queue_operations_done_since_submission_signal_ = false; } if (is_closing_frame) { // Close the capture after submitting. if (pix_capturing_) { IDXGraphicsAnalysis* graphics_analysis = provider.GetGraphicsAnalysis(); if (graphics_analysis != nullptr) { graphics_analysis->EndCapture(); } pix_capturing_ = false; } frame_open_ = false; // Submission already closed now, so minus 1. closed_frame_submissions_[(frame_current_++) % kQueueFrames] = submission_current_ - 1; if (cache_clear_requested_ && AwaitAllQueueOperationsCompletion()) { cache_clear_requested_ = false; ClearCommandAllocatorCache(); ui::d3d12::util::ReleaseAndNull(scratch_buffer_); scratch_buffer_size_ = 0; if (bindless_resources_used_) { texture_cache_bindless_sampler_map_.clear(); for (const auto& sampler_bindless_heap_overflowed : sampler_bindless_heaps_overflowed_) { sampler_bindless_heap_overflowed.first->Release(); } sampler_bindless_heaps_overflowed_.clear(); sampler_bindless_heap_allocated_ = 0; } else { sampler_bindful_heap_pool_->ClearCache(); view_bindful_heap_pool_->ClearCache(); } constant_buffer_pool_->ClearCache(); texture_cache_->ClearCache(); // Not clearing the root signatures as they're referenced by pipelines, // which are not destroyed. primitive_processor_->ClearCache(); render_target_cache_->ClearCache(); shared_memory_->ClearCache(); } } return true; } bool D3D12CommandProcessor::CanEndSubmissionImmediately() const { return !submission_open_ || !pipeline_cache_->IsCreatingPipelines(); } void D3D12CommandProcessor::ClearCommandAllocatorCache() { while (command_allocator_submitted_first_) { auto next = command_allocator_submitted_first_->next; command_allocator_submitted_first_->command_allocator->Release(); delete command_allocator_submitted_first_; command_allocator_submitted_first_ = next; } command_allocator_submitted_last_ = nullptr; while (command_allocator_writable_first_) { auto next = command_allocator_writable_first_->next; command_allocator_writable_first_->command_allocator->Release(); delete command_allocator_writable_first_; command_allocator_writable_first_ = next; } command_allocator_writable_last_ = nullptr; } void D3D12CommandProcessor::UpdateFixedFunctionState( const draw_util::ViewportInfo& viewport_info, const draw_util::Scissor& scissor, bool primitive_polygonal, reg::RB_DEPTHCONTROL normalized_depth_control) { #if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES // Viewport. D3D12_VIEWPORT viewport; viewport.TopLeftX = float(viewport_info.xy_offset[0]); viewport.TopLeftY = float(viewport_info.xy_offset[1]); viewport.Width = float(viewport_info.xy_extent[0]); viewport.Height = float(viewport_info.xy_extent[1]); viewport.MinDepth = viewport_info.z_min; viewport.MaxDepth = viewport_info.z_max; SetViewport(viewport); // Scissor. D3D12_RECT scissor_rect; scissor_rect.left = LONG(scissor.offset[0]); scissor_rect.top = LONG(scissor.offset[1]); scissor_rect.right = LONG(scissor.offset[0] + scissor.extent[0]); scissor_rect.bottom = LONG(scissor.offset[1] + scissor.extent[1]); SetScissorRect(scissor_rect); if (render_target_cache_->GetPath() == RenderTargetCache::Path::kHostRenderTargets) { const RegisterFile& regs = *register_file_; // Blend factor. float blend_factor[] = { regs[XE_GPU_REG_RB_BLEND_RED].f32, regs[XE_GPU_REG_RB_BLEND_GREEN].f32, regs[XE_GPU_REG_RB_BLEND_BLUE].f32, regs[XE_GPU_REG_RB_BLEND_ALPHA].f32, }; // std::memcmp instead of != so in case of NaN, every draw won't be // invalidating it. ff_blend_factor_update_needed_ |= std::memcmp(ff_blend_factor_, blend_factor, sizeof(float) * 4) != 0; if (ff_blend_factor_update_needed_) { std::memcpy(ff_blend_factor_, blend_factor, sizeof(float) * 4); deferred_command_list_.D3DOMSetBlendFactor(ff_blend_factor_); ff_blend_factor_update_needed_ = false; } // Stencil reference value. Per-face reference not supported by Direct3D 12, // choose the back face one only if drawing only back faces. Register stencil_ref_mask_reg; auto pa_su_sc_mode_cntl = regs.Get(); if (primitive_polygonal && normalized_depth_control.backface_enable && pa_su_sc_mode_cntl.cull_front && !pa_su_sc_mode_cntl.cull_back) { stencil_ref_mask_reg = XE_GPU_REG_RB_STENCILREFMASK_BF; } else { stencil_ref_mask_reg = XE_GPU_REG_RB_STENCILREFMASK; } uint32_t stencil_ref = regs.Get(stencil_ref_mask_reg).stencilref; ff_stencil_ref_update_needed_ |= ff_stencil_ref_ != stencil_ref; if (ff_stencil_ref_update_needed_) { ff_stencil_ref_ = stencil_ref; deferred_command_list_.D3DOMSetStencilRef(ff_stencil_ref_); ff_stencil_ref_update_needed_ = false; } } } void D3D12CommandProcessor::UpdateSystemConstantValues( bool shared_memory_is_uav, bool primitive_polygonal, uint32_t line_loop_closing_index, xenos::Endian index_endian, const draw_util::ViewportInfo& viewport_info, uint32_t used_texture_mask, reg::RB_DEPTHCONTROL normalized_depth_control, uint32_t normalized_color_mask) { #if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES const RegisterFile& regs = *register_file_; auto pa_cl_clip_cntl = regs.Get(); auto pa_cl_vte_cntl = regs.Get(); auto pa_su_point_minmax = regs.Get(); auto pa_su_point_size = regs.Get(); auto pa_su_sc_mode_cntl = regs.Get(); float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32; auto rb_colorcontrol = regs.Get(); auto rb_depth_info = regs.Get(); auto rb_stencilrefmask = regs.Get(); auto rb_stencilrefmask_bf = regs.Get(XE_GPU_REG_RB_STENCILREFMASK_BF); auto rb_surface_info = regs.Get(); auto sq_context_misc = regs.Get(); auto sq_program_cntl = regs.Get(); auto vgt_draw_initiator = regs.Get(); uint32_t vgt_indx_offset = regs.Get().indx_offset; uint32_t vgt_max_vtx_indx = regs.Get().max_indx; uint32_t vgt_min_vtx_indx = regs.Get().min_indx; bool edram_rov_used = render_target_cache_->GetPath() == RenderTargetCache::Path::kPixelShaderInterlock; uint32_t draw_resolution_scale_x = texture_cache_->draw_resolution_scale_x(); uint32_t draw_resolution_scale_y = texture_cache_->draw_resolution_scale_y(); // Get the color info register values for each render target. Also, for ROV, // exclude components that don't exist in the format from the write mask. // Don't exclude fully overlapping render targets, however - two render // targets with the same base address are used in the lighting pass of // 4D5307E6, for example, with the needed one picked with dynamic control // flow. reg::RB_COLOR_INFO color_infos[4]; float rt_clamp[4][4]; uint32_t rt_keep_masks[4][2]; for (uint32_t i = 0; i < 4; ++i) { auto color_info = regs.Get( reg::RB_COLOR_INFO::rt_register_indices[i]); color_infos[i] = color_info; if (edram_rov_used) { // Get the mask for keeping previous color's components unmodified, // or two UINT32_MAX if no colors actually existing in the RT are written. DxbcShaderTranslator::ROV_GetColorFormatSystemConstants( color_info.color_format, (normalized_color_mask >> (i * 4)) & 0b1111, rt_clamp[i][0], rt_clamp[i][1], rt_clamp[i][2], rt_clamp[i][3], rt_keep_masks[i][0], rt_keep_masks[i][1]); } } // Disable depth and stencil if it aliases a color render target (for // instance, during the XBLA logo in 58410954, though depth writing is already // disabled there). bool depth_stencil_enabled = normalized_depth_control.stencil_enable || normalized_depth_control.z_enable; if (edram_rov_used && depth_stencil_enabled) { for (uint32_t i = 0; i < 4; ++i) { if (rb_depth_info.depth_base == color_infos[i].color_base && (rt_keep_masks[i][0] != UINT32_MAX || rt_keep_masks[i][1] != UINT32_MAX)) { depth_stencil_enabled = false; break; } } } bool dirty = false; // Flags. uint32_t flags = 0; // Whether shared memory is an SRV or a UAV. Because a resource can't be in a // read-write (UAV) and a read-only (SRV, IBV) state at once, if any shader in // the pipeline uses memexport, the shared memory buffer must be a UAV. if (shared_memory_is_uav) { flags |= DxbcShaderTranslator::kSysFlag_SharedMemoryIsUAV; } // W0 division control. // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf // 8: VTX_XY_FMT = true: the incoming XY have already been multiplied by 1/W0. // = false: multiply the X, Y coordinates by 1/W0. // 9: VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0. // = false: multiply the Z coordinate by 1/W0. // 10: VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal // to get 1/W0. if (pa_cl_vte_cntl.vtx_xy_fmt) { flags |= DxbcShaderTranslator::kSysFlag_XYDividedByW; } if (pa_cl_vte_cntl.vtx_z_fmt) { flags |= DxbcShaderTranslator::kSysFlag_ZDividedByW; } if (pa_cl_vte_cntl.vtx_w0_fmt) { flags |= DxbcShaderTranslator::kSysFlag_WNotReciprocal; } // User clip planes (UCP_ENA_#), when not CLIP_DISABLE. if (!pa_cl_clip_cntl.clip_disable) { flags |= (pa_cl_clip_cntl.value & 0b111111) << DxbcShaderTranslator::kSysFlag_UserClipPlane0_Shift; } // Whether the primitive is polygonal and SV_IsFrontFace matters. if (primitive_polygonal) { flags |= DxbcShaderTranslator::kSysFlag_PrimitivePolygonal; } // Primitive type. if (vgt_draw_initiator.prim_type == xenos::PrimitiveType::kPointList) { flags |= DxbcShaderTranslator::kSysFlag_PrimitivePoint; } else if (draw_util::IsPrimitiveLine(regs)) { flags |= DxbcShaderTranslator::kSysFlag_PrimitiveLine; } // Primitive killing condition. if (pa_cl_clip_cntl.vtx_kill_or) { flags |= DxbcShaderTranslator::kSysFlag_KillIfAnyVertexKilled; } // Depth format. if (rb_depth_info.depth_format == xenos::DepthRenderTargetFormat::kD24FS8) { flags |= DxbcShaderTranslator::kSysFlag_DepthFloat24; } // Alpha test. xenos::CompareFunction alpha_test_function = rb_colorcontrol.alpha_test_enable ? rb_colorcontrol.alpha_func : xenos::CompareFunction::kAlways; flags |= uint32_t(alpha_test_function) << DxbcShaderTranslator::kSysFlag_AlphaPassIfLess_Shift; // Gamma writing. if (!render_target_cache_->gamma_render_target_as_srgb()) { for (uint32_t i = 0; i < 4; ++i) { if (color_infos[i].color_format == xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA) { flags |= DxbcShaderTranslator::kSysFlag_ConvertColor0ToGamma << i; } } } if (edram_rov_used && depth_stencil_enabled) { flags |= DxbcShaderTranslator::kSysFlag_ROVDepthStencil; if (normalized_depth_control.z_enable) { flags |= uint32_t(normalized_depth_control.zfunc) << DxbcShaderTranslator::kSysFlag_ROVDepthPassIfLess_Shift; if (normalized_depth_control.z_write_enable) { flags |= DxbcShaderTranslator::kSysFlag_ROVDepthWrite; } } else { // In case stencil is used without depth testing - always pass, and // don't modify the stored depth. flags |= DxbcShaderTranslator::kSysFlag_ROVDepthPassIfLess | DxbcShaderTranslator::kSysFlag_ROVDepthPassIfEqual | DxbcShaderTranslator::kSysFlag_ROVDepthPassIfGreater; } if (normalized_depth_control.stencil_enable) { flags |= DxbcShaderTranslator::kSysFlag_ROVStencilTest; } // Hint - if not applicable to the shader, will not have effect. if (alpha_test_function == xenos::CompareFunction::kAlways && !rb_colorcontrol.alpha_to_mask_enable) { flags |= DxbcShaderTranslator::kSysFlag_ROVDepthStencilEarlyWrite; } } dirty |= system_constants_.flags != flags; system_constants_.flags = flags; // Tessellation factor range, plus 1.0 according to the images in // https://www.slideshare.net/blackdevilvikas/next-generation-graphics-programming-on-xbox-360 float tessellation_factor_min = regs[XE_GPU_REG_VGT_HOS_MIN_TESS_LEVEL].f32 + 1.0f; float tessellation_factor_max = regs[XE_GPU_REG_VGT_HOS_MAX_TESS_LEVEL].f32 + 1.0f; dirty |= system_constants_.tessellation_factor_range_min != tessellation_factor_min; system_constants_.tessellation_factor_range_min = tessellation_factor_min; dirty |= system_constants_.tessellation_factor_range_max != tessellation_factor_max; system_constants_.tessellation_factor_range_max = tessellation_factor_max; // Line loop closing index (or 0 when drawing other primitives or using an // index buffer). dirty |= system_constants_.line_loop_closing_index != line_loop_closing_index; system_constants_.line_loop_closing_index = line_loop_closing_index; // Index or tessellation edge factor buffer endianness. dirty |= system_constants_.vertex_index_endian != index_endian; system_constants_.vertex_index_endian = index_endian; // Vertex index offset. dirty |= system_constants_.vertex_index_offset != vgt_indx_offset; system_constants_.vertex_index_offset = vgt_indx_offset; // Vertex index range. dirty |= system_constants_.vertex_index_min != vgt_min_vtx_indx; dirty |= system_constants_.vertex_index_max != vgt_max_vtx_indx; system_constants_.vertex_index_min = vgt_min_vtx_indx; system_constants_.vertex_index_max = vgt_max_vtx_indx; // User clip planes (UCP_ENA_#), when not CLIP_DISABLE. if (!pa_cl_clip_cntl.clip_disable) { for (uint32_t i = 0; i < 6; ++i) { if (!(pa_cl_clip_cntl.value & (1 << i))) { continue; } const float* ucp = ®s[XE_GPU_REG_PA_CL_UCP_0_X + i * 4].f32; if (std::memcmp(system_constants_.user_clip_planes[i], ucp, 4 * sizeof(float))) { dirty = true; std::memcpy(system_constants_.user_clip_planes[i], ucp, 4 * sizeof(float)); } } } // Conversion to Direct3D 12 normalized device coordinates. for (uint32_t i = 0; i < 3; ++i) { dirty |= system_constants_.ndc_scale[i] != viewport_info.ndc_scale[i]; dirty |= system_constants_.ndc_offset[i] != viewport_info.ndc_offset[i]; system_constants_.ndc_scale[i] = viewport_info.ndc_scale[i]; system_constants_.ndc_offset[i] = viewport_info.ndc_offset[i]; } // Point size. float point_vertex_diameter_min = float(pa_su_point_minmax.min_size) * (2.0f / 16.0f); float point_vertex_diameter_max = float(pa_su_point_minmax.max_size) * (2.0f / 16.0f); float point_constant_diameter_x = float(pa_su_point_size.width) * (2.0f / 16.0f); float point_constant_diameter_y = float(pa_su_point_size.height) * (2.0f / 16.0f); dirty |= system_constants_.point_vertex_diameter_min != point_vertex_diameter_min; dirty |= system_constants_.point_vertex_diameter_max != point_vertex_diameter_max; dirty |= system_constants_.point_constant_diameter[0] != point_constant_diameter_x; dirty |= system_constants_.point_constant_diameter[1] != point_constant_diameter_y; system_constants_.point_vertex_diameter_min = point_vertex_diameter_min; system_constants_.point_vertex_diameter_max = point_vertex_diameter_max; system_constants_.point_constant_diameter[0] = point_constant_diameter_x; system_constants_.point_constant_diameter[1] = point_constant_diameter_y; // 2 because 1 in the NDC is half of the viewport's axis, 0.5 for diameter to // radius conversion to avoid multiplying the per-vertex diameter by an // additional constant in the shader. float point_screen_diameter_to_ndc_radius_x = (/* 0.5f * 2.0f * */ float(draw_resolution_scale_x)) / std::max(viewport_info.xy_extent[0], uint32_t(1)); float point_screen_diameter_to_ndc_radius_y = (/* 0.5f * 2.0f * */ float(draw_resolution_scale_y)) / std::max(viewport_info.xy_extent[1], uint32_t(1)); dirty |= system_constants_.point_screen_diameter_to_ndc_radius[0] != point_screen_diameter_to_ndc_radius_x; dirty |= system_constants_.point_screen_diameter_to_ndc_radius[1] != point_screen_diameter_to_ndc_radius_y; system_constants_.point_screen_diameter_to_ndc_radius[0] = point_screen_diameter_to_ndc_radius_x; system_constants_.point_screen_diameter_to_ndc_radius[1] = point_screen_diameter_to_ndc_radius_y; // Interpolator sampling pattern, centroid or center. uint32_t interpolator_sampling_pattern = xenos::GetInterpolatorSamplingPattern( rb_surface_info.msaa_samples, sq_context_misc.sc_sample_cntl, regs.Get().sampling_pattern); dirty |= system_constants_.interpolator_sampling_pattern != interpolator_sampling_pattern; system_constants_.interpolator_sampling_pattern = interpolator_sampling_pattern; // Pixel parameter register. uint32_t ps_param_gen = sq_program_cntl.param_gen ? sq_context_misc.param_gen_pos : UINT_MAX; dirty |= system_constants_.ps_param_gen != ps_param_gen; system_constants_.ps_param_gen = ps_param_gen; // Texture signedness / gamma. bool gamma_render_target_as_srgb = render_target_cache_->gamma_render_target_as_srgb(); uint32_t textures_resolved = 0; uint32_t textures_remaining = used_texture_mask; uint32_t texture_index; while (xe::bit_scan_forward(textures_remaining, &texture_index)) { textures_remaining &= ~(uint32_t(1) << texture_index); uint32_t& texture_signs_uint = system_constants_.texture_swizzled_signs[texture_index >> 2]; uint32_t texture_signs_shift = (texture_index & 3) * 8; uint8_t texture_signs = texture_cache_->GetActiveTextureSwizzledSigns(texture_index); uint32_t texture_signs_shifted = uint32_t(texture_signs) << texture_signs_shift; uint32_t texture_signs_mask = uint32_t(0b11111111) << texture_signs_shift; dirty |= (texture_signs_uint & texture_signs_mask) != texture_signs_shifted; texture_signs_uint = (texture_signs_uint & ~texture_signs_mask) | texture_signs_shifted; textures_resolved |= uint32_t(texture_cache_->IsActiveTextureResolved(texture_index)) << texture_index; } dirty |= system_constants_.textures_resolved != textures_resolved; system_constants_.textures_resolved = textures_resolved; // Log2 of sample count, for alpha to mask and with ROV, for EDRAM address // calculation with MSAA. uint32_t sample_count_log2_x = rb_surface_info.msaa_samples >= xenos::MsaaSamples::k4X ? 1 : 0; uint32_t sample_count_log2_y = rb_surface_info.msaa_samples >= xenos::MsaaSamples::k2X ? 1 : 0; dirty |= system_constants_.sample_count_log2[0] != sample_count_log2_x; dirty |= system_constants_.sample_count_log2[1] != sample_count_log2_y; system_constants_.sample_count_log2[0] = sample_count_log2_x; system_constants_.sample_count_log2[1] = sample_count_log2_y; // Alpha test and alpha to coverage. dirty |= system_constants_.alpha_test_reference != rb_alpha_ref; system_constants_.alpha_test_reference = rb_alpha_ref; uint32_t alpha_to_mask = rb_colorcontrol.alpha_to_mask_enable ? (rb_colorcontrol.value >> 24) | (1 << 8) : 0; dirty |= system_constants_.alpha_to_mask != alpha_to_mask; system_constants_.alpha_to_mask = alpha_to_mask; uint32_t edram_tile_dwords_scaled = xenos::kEdramTileWidthSamples * xenos::kEdramTileHeightSamples * (draw_resolution_scale_x * draw_resolution_scale_y); // EDRAM pitch for ROV writing. if (edram_rov_used) { // Align, then multiply by 32bpp tile size in dwords. uint32_t edram_32bpp_tile_pitch_dwords_scaled = ((rb_surface_info.surface_pitch * (rb_surface_info.msaa_samples >= xenos::MsaaSamples::k4X ? 2 : 1)) + (xenos::kEdramTileWidthSamples - 1)) / xenos::kEdramTileWidthSamples * edram_tile_dwords_scaled; dirty |= system_constants_.edram_32bpp_tile_pitch_dwords_scaled != edram_32bpp_tile_pitch_dwords_scaled; system_constants_.edram_32bpp_tile_pitch_dwords_scaled = edram_32bpp_tile_pitch_dwords_scaled; } // Color exponent bias and ROV render target writing. for (uint32_t i = 0; i < 4; ++i) { reg::RB_COLOR_INFO color_info = color_infos[i]; // Exponent bias is in bits 20:25 of RB_COLOR_INFO. int32_t color_exp_bias = color_info.color_exp_bias; if (color_info.color_format == xenos::ColorRenderTargetFormat::k_16_16 || color_info.color_format == xenos::ColorRenderTargetFormat::k_16_16_16_16) { if (render_target_cache_->GetPath() == RenderTargetCache::Path::kHostRenderTargets && !render_target_cache_->IsFixed16TruncatedToMinus1To1()) { // Remap from -32...32 to -1...1 by dividing the output values by 32, // losing blending correctness, but getting the full range. color_exp_bias -= 5; } } float color_exp_bias_scale; *reinterpret_cast(&color_exp_bias_scale) = 0x3F800000 + (color_exp_bias << 23); dirty |= system_constants_.color_exp_bias[i] != color_exp_bias_scale; system_constants_.color_exp_bias[i] = color_exp_bias_scale; if (edram_rov_used) { dirty |= system_constants_.edram_rt_keep_mask[i][0] != rt_keep_masks[i][0]; system_constants_.edram_rt_keep_mask[i][0] = rt_keep_masks[i][0]; dirty |= system_constants_.edram_rt_keep_mask[i][1] != rt_keep_masks[i][1]; system_constants_.edram_rt_keep_mask[i][1] = rt_keep_masks[i][1]; if (rt_keep_masks[i][0] != UINT32_MAX || rt_keep_masks[i][1] != UINT32_MAX) { uint32_t rt_base_dwords_scaled = color_info.color_base * edram_tile_dwords_scaled; dirty |= system_constants_.edram_rt_base_dwords_scaled[i] != rt_base_dwords_scaled; system_constants_.edram_rt_base_dwords_scaled[i] = rt_base_dwords_scaled; uint32_t format_flags = DxbcShaderTranslator::ROV_AddColorFormatFlags( color_info.color_format); dirty |= system_constants_.edram_rt_format_flags[i] != format_flags; system_constants_.edram_rt_format_flags[i] = format_flags; // Can't do float comparisons here because NaNs would result in always // setting the dirty flag. dirty |= std::memcmp(system_constants_.edram_rt_clamp[i], rt_clamp[i], 4 * sizeof(float)) != 0; std::memcpy(system_constants_.edram_rt_clamp[i], rt_clamp[i], 4 * sizeof(float)); uint32_t blend_factors_ops = regs[reg::RB_BLENDCONTROL::rt_register_indices[i]].u32 & 0x1FFF1FFF; dirty |= system_constants_.edram_rt_blend_factors_ops[i] != blend_factors_ops; system_constants_.edram_rt_blend_factors_ops[i] = blend_factors_ops; } } } if (edram_rov_used) { uint32_t depth_base_dwords_scaled = rb_depth_info.depth_base * edram_tile_dwords_scaled; dirty |= system_constants_.edram_depth_base_dwords_scaled != depth_base_dwords_scaled; system_constants_.edram_depth_base_dwords_scaled = depth_base_dwords_scaled; // For non-polygons, front polygon offset is used, and it's enabled if // POLY_OFFSET_PARA_ENABLED is set, for polygons, separate front and back // are used. float poly_offset_front_scale = 0.0f, poly_offset_front_offset = 0.0f; float poly_offset_back_scale = 0.0f, poly_offset_back_offset = 0.0f; if (primitive_polygonal) { if (pa_su_sc_mode_cntl.poly_offset_front_enable) { poly_offset_front_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; poly_offset_front_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; } if (pa_su_sc_mode_cntl.poly_offset_back_enable) { poly_offset_back_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32; poly_offset_back_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32; } } else { if (pa_su_sc_mode_cntl.poly_offset_para_enable) { poly_offset_front_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; poly_offset_front_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; poly_offset_back_scale = poly_offset_front_scale; poly_offset_back_offset = poly_offset_front_offset; } } // With non-square resolution scaling, make sure the worst-case impact is // reverted (slope only along the scaled axis), thus max. More bias is // better than less bias, because less bias means Z fighting with the // background is more likely. float poly_offset_scale_factor = xenos::kPolygonOffsetScaleSubpixelUnit * std::max(draw_resolution_scale_x, draw_resolution_scale_y); poly_offset_front_scale *= poly_offset_scale_factor; poly_offset_back_scale *= poly_offset_scale_factor; dirty |= system_constants_.edram_poly_offset_front_scale != poly_offset_front_scale; system_constants_.edram_poly_offset_front_scale = poly_offset_front_scale; dirty |= system_constants_.edram_poly_offset_front_offset != poly_offset_front_offset; system_constants_.edram_poly_offset_front_offset = poly_offset_front_offset; dirty |= system_constants_.edram_poly_offset_back_scale != poly_offset_back_scale; system_constants_.edram_poly_offset_back_scale = poly_offset_back_scale; dirty |= system_constants_.edram_poly_offset_back_offset != poly_offset_back_offset; system_constants_.edram_poly_offset_back_offset = poly_offset_back_offset; if (depth_stencil_enabled && normalized_depth_control.stencil_enable) { dirty |= system_constants_.edram_stencil_front_reference != rb_stencilrefmask.stencilref; system_constants_.edram_stencil_front_reference = rb_stencilrefmask.stencilref; dirty |= system_constants_.edram_stencil_front_read_mask != rb_stencilrefmask.stencilmask; system_constants_.edram_stencil_front_read_mask = rb_stencilrefmask.stencilmask; dirty |= system_constants_.edram_stencil_front_write_mask != rb_stencilrefmask.stencilwritemask; system_constants_.edram_stencil_front_write_mask = rb_stencilrefmask.stencilwritemask; uint32_t stencil_func_ops = (normalized_depth_control.value >> 8) & ((1 << 12) - 1); dirty |= system_constants_.edram_stencil_front_func_ops != stencil_func_ops; system_constants_.edram_stencil_front_func_ops = stencil_func_ops; if (primitive_polygonal && normalized_depth_control.backface_enable) { dirty |= system_constants_.edram_stencil_back_reference != rb_stencilrefmask_bf.stencilref; system_constants_.edram_stencil_back_reference = rb_stencilrefmask_bf.stencilref; dirty |= system_constants_.edram_stencil_back_read_mask != rb_stencilrefmask_bf.stencilmask; system_constants_.edram_stencil_back_read_mask = rb_stencilrefmask_bf.stencilmask; dirty |= system_constants_.edram_stencil_back_write_mask != rb_stencilrefmask_bf.stencilwritemask; system_constants_.edram_stencil_back_write_mask = rb_stencilrefmask_bf.stencilwritemask; uint32_t stencil_func_ops_bf = (normalized_depth_control.value >> 20) & ((1 << 12) - 1); dirty |= system_constants_.edram_stencil_back_func_ops != stencil_func_ops_bf; system_constants_.edram_stencil_back_func_ops = stencil_func_ops_bf; } else { dirty |= std::memcmp(system_constants_.edram_stencil_back, system_constants_.edram_stencil_front, 4 * sizeof(uint32_t)) != 0; std::memcpy(system_constants_.edram_stencil_back, system_constants_.edram_stencil_front, 4 * sizeof(uint32_t)); } } dirty |= system_constants_.edram_blend_constant[0] != regs[XE_GPU_REG_RB_BLEND_RED].f32; system_constants_.edram_blend_constant[0] = regs[XE_GPU_REG_RB_BLEND_RED].f32; dirty |= system_constants_.edram_blend_constant[1] != regs[XE_GPU_REG_RB_BLEND_GREEN].f32; system_constants_.edram_blend_constant[1] = regs[XE_GPU_REG_RB_BLEND_GREEN].f32; dirty |= system_constants_.edram_blend_constant[2] != regs[XE_GPU_REG_RB_BLEND_BLUE].f32; system_constants_.edram_blend_constant[2] = regs[XE_GPU_REG_RB_BLEND_BLUE].f32; dirty |= system_constants_.edram_blend_constant[3] != regs[XE_GPU_REG_RB_BLEND_ALPHA].f32; system_constants_.edram_blend_constant[3] = regs[XE_GPU_REG_RB_BLEND_ALPHA].f32; } cbuffer_binding_system_.up_to_date &= !dirty; } bool D3D12CommandProcessor::UpdateBindings( const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader, ID3D12RootSignature* root_signature) { const ui::d3d12::D3D12Provider& provider = GetD3D12Provider(); ID3D12Device* device = provider.GetDevice(); const RegisterFile& regs = *register_file_; #if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES // Set the new root signature. if (current_graphics_root_signature_ != root_signature) { current_graphics_root_signature_ = root_signature; if (!bindless_resources_used_) { GetRootBindfulExtraParameterIndices( vertex_shader, pixel_shader, current_graphics_root_bindful_extras_); } // Changing the root signature invalidates all bindings. current_graphics_root_up_to_date_ = 0; deferred_command_list_.D3DSetGraphicsRootSignature(root_signature); } // Select the root parameter indices depending on the used binding model. uint32_t root_parameter_fetch_constants = bindless_resources_used_ ? kRootParameter_Bindless_FetchConstants : kRootParameter_Bindful_FetchConstants; uint32_t root_parameter_float_constants_vertex = bindless_resources_used_ ? kRootParameter_Bindless_FloatConstantsVertex : kRootParameter_Bindful_FloatConstantsVertex; uint32_t root_parameter_float_constants_pixel = bindless_resources_used_ ? kRootParameter_Bindless_FloatConstantsPixel : kRootParameter_Bindful_FloatConstantsPixel; uint32_t root_parameter_system_constants = bindless_resources_used_ ? kRootParameter_Bindless_SystemConstants : kRootParameter_Bindful_SystemConstants; uint32_t root_parameter_bool_loop_constants = bindless_resources_used_ ? kRootParameter_Bindless_BoolLoopConstants : kRootParameter_Bindful_BoolLoopConstants; // // Update root constant buffers that are common for bindful and bindless. // // These are the constant base addresses/ranges for shaders. // We have these hardcoded right now cause nothing seems to differ on the Xbox // 360 (however, OpenGL ES on Adreno 200 on Android has different ranges). assert_true(regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x000FF000 || regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000); assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 || regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000); // Check if the float constant layout is still the same and get the counts. const Shader::ConstantRegisterMap& float_constant_map_vertex = vertex_shader->constant_register_map(); uint32_t float_constant_count_vertex = float_constant_map_vertex.float_count; for (uint32_t i = 0; i < 4; ++i) { if (current_float_constant_map_vertex_[i] != float_constant_map_vertex.float_bitmap[i]) { current_float_constant_map_vertex_[i] = float_constant_map_vertex.float_bitmap[i]; // If no float constants at all, we can reuse any buffer for them, so not // invalidating. if (float_constant_count_vertex) { cbuffer_binding_float_vertex_.up_to_date = false; } } } uint32_t float_constant_count_pixel = 0; if (pixel_shader != nullptr) { const Shader::ConstantRegisterMap& float_constant_map_pixel = pixel_shader->constant_register_map(); float_constant_count_pixel = float_constant_map_pixel.float_count; for (uint32_t i = 0; i < 4; ++i) { if (current_float_constant_map_pixel_[i] != float_constant_map_pixel.float_bitmap[i]) { current_float_constant_map_pixel_[i] = float_constant_map_pixel.float_bitmap[i]; if (float_constant_count_pixel) { cbuffer_binding_float_pixel_.up_to_date = false; } } } } else { std::memset(current_float_constant_map_pixel_, 0, sizeof(current_float_constant_map_pixel_)); } // Write the constant buffer data. if (!cbuffer_binding_system_.up_to_date) { uint8_t* system_constants = constant_buffer_pool_->Request( frame_current_, sizeof(system_constants_), D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr, &cbuffer_binding_system_.address); if (system_constants == nullptr) { return false; } std::memcpy(system_constants, &system_constants_, sizeof(system_constants_)); cbuffer_binding_system_.up_to_date = true; current_graphics_root_up_to_date_ &= ~(1u << root_parameter_system_constants); } if (!cbuffer_binding_float_vertex_.up_to_date) { // Even if the shader doesn't need any float constants, a valid binding must // still be provided, so if the first draw in the frame with the current // root signature doesn't have float constants at all, still allocate an // empty buffer. uint8_t* float_constants = constant_buffer_pool_->Request( frame_current_, sizeof(float) * 4 * std::max(float_constant_count_vertex, uint32_t(1)), D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr, &cbuffer_binding_float_vertex_.address); if (float_constants == nullptr) { return false; } for (uint32_t i = 0; i < 4; ++i) { uint64_t float_constant_map_entry = float_constant_map_vertex.float_bitmap[i]; uint32_t float_constant_index; while (xe::bit_scan_forward(float_constant_map_entry, &float_constant_index)) { float_constant_map_entry &= ~(1ull << float_constant_index); std::memcpy(float_constants, ®s[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) + (float_constant_index << 2)] .f32, 4 * sizeof(float)); float_constants += 4 * sizeof(float); } } cbuffer_binding_float_vertex_.up_to_date = true; current_graphics_root_up_to_date_ &= ~(1u << root_parameter_float_constants_vertex); } if (!cbuffer_binding_float_pixel_.up_to_date) { uint8_t* float_constants = constant_buffer_pool_->Request( frame_current_, sizeof(float) * 4 * std::max(float_constant_count_pixel, uint32_t(1)), D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr, &cbuffer_binding_float_pixel_.address); if (float_constants == nullptr) { return false; } if (pixel_shader != nullptr) { const Shader::ConstantRegisterMap& float_constant_map_pixel = pixel_shader->constant_register_map(); for (uint32_t i = 0; i < 4; ++i) { uint64_t float_constant_map_entry = float_constant_map_pixel.float_bitmap[i]; uint32_t float_constant_index; while (xe::bit_scan_forward(float_constant_map_entry, &float_constant_index)) { float_constant_map_entry &= ~(1ull << float_constant_index); std::memcpy(float_constants, ®s[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) + (float_constant_index << 2)] .f32, 4 * sizeof(float)); float_constants += 4 * sizeof(float); } } } cbuffer_binding_float_pixel_.up_to_date = true; current_graphics_root_up_to_date_ &= ~(1u << root_parameter_float_constants_pixel); } if (!cbuffer_binding_bool_loop_.up_to_date) { constexpr uint32_t kBoolLoopConstantsSize = (8 + 32) * sizeof(uint32_t); uint8_t* bool_loop_constants = constant_buffer_pool_->Request( frame_current_, kBoolLoopConstantsSize, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr, &cbuffer_binding_bool_loop_.address); if (bool_loop_constants == nullptr) { return false; } std::memcpy(bool_loop_constants, ®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32, kBoolLoopConstantsSize); cbuffer_binding_bool_loop_.up_to_date = true; current_graphics_root_up_to_date_ &= ~(1u << root_parameter_bool_loop_constants); } if (!cbuffer_binding_fetch_.up_to_date) { constexpr uint32_t kFetchConstantsSize = 32 * 6 * sizeof(uint32_t); uint8_t* fetch_constants = constant_buffer_pool_->Request( frame_current_, kFetchConstantsSize, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr, &cbuffer_binding_fetch_.address); if (fetch_constants == nullptr) { return false; } std::memcpy(fetch_constants, ®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32, kFetchConstantsSize); cbuffer_binding_fetch_.up_to_date = true; current_graphics_root_up_to_date_ &= ~(1u << root_parameter_fetch_constants); } // // Update descriptors. // // Get textures and samplers used by the vertex shader, check if the last used // samplers are compatible and update them. size_t texture_layout_uid_vertex = vertex_shader->GetTextureBindingLayoutUserUID(); size_t sampler_layout_uid_vertex = vertex_shader->GetSamplerBindingLayoutUserUID(); const std::vector& textures_vertex = vertex_shader->GetTextureBindingsAfterTranslation(); const std::vector& samplers_vertex = vertex_shader->GetSamplerBindingsAfterTranslation(); size_t texture_count_vertex = textures_vertex.size(); size_t sampler_count_vertex = samplers_vertex.size(); if (sampler_count_vertex) { if (current_sampler_layout_uid_vertex_ != sampler_layout_uid_vertex) { current_sampler_layout_uid_vertex_ = sampler_layout_uid_vertex; cbuffer_binding_descriptor_indices_vertex_.up_to_date = false; bindful_samplers_written_vertex_ = false; } current_samplers_vertex_.resize( std::max(current_samplers_vertex_.size(), sampler_count_vertex)); for (size_t i = 0; i < sampler_count_vertex; ++i) { D3D12TextureCache::SamplerParameters parameters = texture_cache_->GetSamplerParameters(samplers_vertex[i]); if (current_samplers_vertex_[i] != parameters) { cbuffer_binding_descriptor_indices_vertex_.up_to_date = false; bindful_samplers_written_vertex_ = false; current_samplers_vertex_[i] = parameters; } } } // Get textures and samplers used by the pixel shader, check if the last used // samplers are compatible and update them. size_t texture_layout_uid_pixel, sampler_layout_uid_pixel; const std::vector* textures_pixel; const std::vector* samplers_pixel; size_t texture_count_pixel, sampler_count_pixel; if (pixel_shader != nullptr) { texture_layout_uid_pixel = pixel_shader->GetTextureBindingLayoutUserUID(); sampler_layout_uid_pixel = pixel_shader->GetSamplerBindingLayoutUserUID(); textures_pixel = &pixel_shader->GetTextureBindingsAfterTranslation(); texture_count_pixel = textures_pixel->size(); samplers_pixel = &pixel_shader->GetSamplerBindingsAfterTranslation(); sampler_count_pixel = samplers_pixel->size(); if (sampler_count_pixel) { if (current_sampler_layout_uid_pixel_ != sampler_layout_uid_pixel) { current_sampler_layout_uid_pixel_ = sampler_layout_uid_pixel; cbuffer_binding_descriptor_indices_pixel_.up_to_date = false; bindful_samplers_written_pixel_ = false; } current_samplers_pixel_.resize(std::max(current_samplers_pixel_.size(), size_t(sampler_count_pixel))); for (uint32_t i = 0; i < sampler_count_pixel; ++i) { D3D12TextureCache::SamplerParameters parameters = texture_cache_->GetSamplerParameters((*samplers_pixel)[i]); if (current_samplers_pixel_[i] != parameters) { current_samplers_pixel_[i] = parameters; cbuffer_binding_descriptor_indices_pixel_.up_to_date = false; bindful_samplers_written_pixel_ = false; } } } } else { texture_layout_uid_pixel = PipelineCache::kLayoutUIDEmpty; sampler_layout_uid_pixel = PipelineCache::kLayoutUIDEmpty; textures_pixel = nullptr; texture_count_pixel = 0; samplers_pixel = nullptr; sampler_count_pixel = 0; } assert_true(sampler_count_vertex + sampler_count_pixel <= kSamplerHeapSize); if (bindless_resources_used_) { // // Bindless descriptors path. // // Check if need to write new descriptor indices. // Samplers have already been checked. if (texture_count_vertex && cbuffer_binding_descriptor_indices_vertex_.up_to_date && (current_texture_layout_uid_vertex_ != texture_layout_uid_vertex || !texture_cache_->AreActiveTextureSRVKeysUpToDate( current_texture_srv_keys_vertex_.data(), textures_vertex.data(), texture_count_vertex))) { cbuffer_binding_descriptor_indices_vertex_.up_to_date = false; } if (texture_count_pixel && cbuffer_binding_descriptor_indices_pixel_.up_to_date && (current_texture_layout_uid_pixel_ != texture_layout_uid_pixel || !texture_cache_->AreActiveTextureSRVKeysUpToDate( current_texture_srv_keys_pixel_.data(), textures_pixel->data(), texture_count_pixel))) { cbuffer_binding_descriptor_indices_pixel_.up_to_date = false; } // Get sampler descriptor indices, write new samplers, and handle sampler // heap overflow if it happens. if ((sampler_count_vertex && !cbuffer_binding_descriptor_indices_vertex_.up_to_date) || (sampler_count_pixel && !cbuffer_binding_descriptor_indices_pixel_.up_to_date)) { for (uint32_t i = 0; i < 2; ++i) { if (i) { // Overflow happened - invalidate sampler bindings because their // descriptor indices can't be used anymore (and even if heap creation // fails, because current_sampler_bindless_indices_#_ are in an // undefined state now) and switch to a new sampler heap. cbuffer_binding_descriptor_indices_vertex_.up_to_date = false; cbuffer_binding_descriptor_indices_pixel_.up_to_date = false; ID3D12DescriptorHeap* sampler_heap_new; if (!sampler_bindless_heaps_overflowed_.empty() && sampler_bindless_heaps_overflowed_.front().second <= submission_completed_) { sampler_heap_new = sampler_bindless_heaps_overflowed_.front().first; sampler_bindless_heaps_overflowed_.pop_front(); } else { D3D12_DESCRIPTOR_HEAP_DESC sampler_heap_new_desc; sampler_heap_new_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; sampler_heap_new_desc.NumDescriptors = kSamplerHeapSize; sampler_heap_new_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; sampler_heap_new_desc.NodeMask = 0; if (FAILED(device->CreateDescriptorHeap( &sampler_heap_new_desc, IID_PPV_ARGS(&sampler_heap_new)))) { XELOGE( "Failed to create a new bindless sampler descriptor heap " "after an overflow of the previous one"); return false; } } // Only change the heap if a new heap was created successfully, not to // leave the values in an undefined state in case CreateDescriptorHeap // has failed. sampler_bindless_heaps_overflowed_.push_back(std::make_pair( sampler_bindless_heap_current_, submission_current_)); sampler_bindless_heap_current_ = sampler_heap_new; sampler_bindless_heap_cpu_start_ = sampler_bindless_heap_current_ ->GetCPUDescriptorHandleForHeapStart(); sampler_bindless_heap_gpu_start_ = sampler_bindless_heap_current_ ->GetGPUDescriptorHandleForHeapStart(); sampler_bindless_heap_allocated_ = 0; // The only thing the heap is used for now is texture cache samplers - // invalidate all of them. texture_cache_bindless_sampler_map_.clear(); deferred_command_list_.SetDescriptorHeaps( view_bindless_heap_, sampler_bindless_heap_current_); current_graphics_root_up_to_date_ &= ~(1u << kRootParameter_Bindless_SamplerHeap); } bool samplers_overflowed = false; if (sampler_count_vertex && !cbuffer_binding_descriptor_indices_vertex_.up_to_date) { current_sampler_bindless_indices_vertex_.resize( std::max(current_sampler_bindless_indices_vertex_.size(), size_t(sampler_count_vertex))); for (uint32_t j = 0; j < sampler_count_vertex; ++j) { D3D12TextureCache::SamplerParameters sampler_parameters = current_samplers_vertex_[j]; uint32_t sampler_index; auto it = texture_cache_bindless_sampler_map_.find( sampler_parameters.value); if (it != texture_cache_bindless_sampler_map_.end()) { sampler_index = it->second; } else { if (sampler_bindless_heap_allocated_ >= kSamplerHeapSize) { samplers_overflowed = true; break; } sampler_index = sampler_bindless_heap_allocated_++; texture_cache_->WriteSampler( sampler_parameters, provider.OffsetSamplerDescriptor( sampler_bindless_heap_cpu_start_, sampler_index)); texture_cache_bindless_sampler_map_.emplace( sampler_parameters.value, sampler_index); } current_sampler_bindless_indices_vertex_[j] = sampler_index; } } if (samplers_overflowed) { continue; } if (sampler_count_pixel && !cbuffer_binding_descriptor_indices_pixel_.up_to_date) { current_sampler_bindless_indices_pixel_.resize( std::max(current_sampler_bindless_indices_pixel_.size(), size_t(sampler_count_pixel))); for (uint32_t j = 0; j < sampler_count_pixel; ++j) { D3D12TextureCache::SamplerParameters sampler_parameters = current_samplers_pixel_[j]; uint32_t sampler_index; auto it = texture_cache_bindless_sampler_map_.find( sampler_parameters.value); if (it != texture_cache_bindless_sampler_map_.end()) { sampler_index = it->second; } else { if (sampler_bindless_heap_allocated_ >= kSamplerHeapSize) { samplers_overflowed = true; break; } sampler_index = sampler_bindless_heap_allocated_++; texture_cache_->WriteSampler( sampler_parameters, provider.OffsetSamplerDescriptor( sampler_bindless_heap_cpu_start_, sampler_index)); texture_cache_bindless_sampler_map_.emplace( sampler_parameters.value, sampler_index); } current_sampler_bindless_indices_pixel_[j] = sampler_index; } } if (!samplers_overflowed) { break; } } } if (!cbuffer_binding_descriptor_indices_vertex_.up_to_date) { uint32_t* descriptor_indices = reinterpret_cast(constant_buffer_pool_->Request( frame_current_, std::max(texture_count_vertex + sampler_count_vertex, size_t(1)) * sizeof(uint32_t), D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr, &cbuffer_binding_descriptor_indices_vertex_.address)); if (!descriptor_indices) { return false; } for (size_t i = 0; i < texture_count_vertex; ++i) { const D3D12Shader::TextureBinding& texture = textures_vertex[i]; descriptor_indices[texture.bindless_descriptor_index] = texture_cache_->GetActiveTextureBindlessSRVIndex(texture) - uint32_t(SystemBindlessView::kUnboundedSRVsStart); } current_texture_layout_uid_vertex_ = texture_layout_uid_vertex; if (texture_count_vertex) { current_texture_srv_keys_vertex_.resize( std::max(current_texture_srv_keys_vertex_.size(), size_t(texture_count_vertex))); texture_cache_->WriteActiveTextureSRVKeys( current_texture_srv_keys_vertex_.data(), textures_vertex.data(), texture_count_vertex); } // Current samplers have already been updated. for (size_t i = 0; i < sampler_count_vertex; ++i) { descriptor_indices[samplers_vertex[i].bindless_descriptor_index] = current_sampler_bindless_indices_vertex_[i]; } cbuffer_binding_descriptor_indices_vertex_.up_to_date = true; current_graphics_root_up_to_date_ &= ~(1u << kRootParameter_Bindless_DescriptorIndicesVertex); } if (!cbuffer_binding_descriptor_indices_pixel_.up_to_date) { uint32_t* descriptor_indices = reinterpret_cast(constant_buffer_pool_->Request( frame_current_, std::max(texture_count_pixel + sampler_count_pixel, size_t(1)) * sizeof(uint32_t), D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr, &cbuffer_binding_descriptor_indices_pixel_.address)); if (!descriptor_indices) { return false; } for (size_t i = 0; i < texture_count_pixel; ++i) { const D3D12Shader::TextureBinding& texture = (*textures_pixel)[i]; descriptor_indices[texture.bindless_descriptor_index] = texture_cache_->GetActiveTextureBindlessSRVIndex(texture) - uint32_t(SystemBindlessView::kUnboundedSRVsStart); } current_texture_layout_uid_pixel_ = texture_layout_uid_pixel; if (texture_count_pixel) { current_texture_srv_keys_pixel_.resize( std::max(current_texture_srv_keys_pixel_.size(), size_t(texture_count_pixel))); texture_cache_->WriteActiveTextureSRVKeys( current_texture_srv_keys_pixel_.data(), textures_pixel->data(), texture_count_pixel); } // Current samplers have already been updated. for (size_t i = 0; i < sampler_count_pixel; ++i) { descriptor_indices[(*samplers_pixel)[i].bindless_descriptor_index] = current_sampler_bindless_indices_pixel_[i]; } cbuffer_binding_descriptor_indices_pixel_.up_to_date = true; current_graphics_root_up_to_date_ &= ~(1u << kRootParameter_Bindless_DescriptorIndicesPixel); } } else { // // Bindful descriptors path. // // See what descriptors need to be updated. // Samplers have already been checked. bool write_textures_vertex = texture_count_vertex && (!bindful_textures_written_vertex_ || current_texture_layout_uid_vertex_ != texture_layout_uid_vertex || !texture_cache_->AreActiveTextureSRVKeysUpToDate( current_texture_srv_keys_vertex_.data(), textures_vertex.data(), texture_count_vertex)); bool write_textures_pixel = texture_count_pixel && (!bindful_textures_written_pixel_ || current_texture_layout_uid_pixel_ != texture_layout_uid_pixel || !texture_cache_->AreActiveTextureSRVKeysUpToDate( current_texture_srv_keys_pixel_.data(), textures_pixel->data(), texture_count_pixel)); bool write_samplers_vertex = sampler_count_vertex && !bindful_samplers_written_vertex_; bool write_samplers_pixel = sampler_count_pixel && !bindful_samplers_written_pixel_; bool edram_rov_used = render_target_cache_->GetPath() == RenderTargetCache::Path::kPixelShaderInterlock; // Allocate the descriptors. size_t view_count_partial_update = 0; if (write_textures_vertex) { view_count_partial_update += texture_count_vertex; } if (write_textures_pixel) { view_count_partial_update += texture_count_pixel; } // All the constants + shared memory SRV and UAV + textures. size_t view_count_full_update = 2 + texture_count_vertex + texture_count_pixel; if (edram_rov_used) { // + EDRAM UAV. ++view_count_full_update; } D3D12_CPU_DESCRIPTOR_HANDLE view_cpu_handle; D3D12_GPU_DESCRIPTOR_HANDLE view_gpu_handle; uint32_t descriptor_size_view = provider.GetViewDescriptorSize(); uint64_t view_heap_index = RequestViewBindfulDescriptors( draw_view_bindful_heap_index_, uint32_t(view_count_partial_update), uint32_t(view_count_full_update), view_cpu_handle, view_gpu_handle); if (view_heap_index == ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid) { XELOGE("Failed to allocate view descriptors"); return false; } size_t sampler_count_partial_update = 0; if (write_samplers_vertex) { sampler_count_partial_update += sampler_count_vertex; } if (write_samplers_pixel) { sampler_count_partial_update += sampler_count_pixel; } D3D12_CPU_DESCRIPTOR_HANDLE sampler_cpu_handle = {}; D3D12_GPU_DESCRIPTOR_HANDLE sampler_gpu_handle = {}; uint32_t descriptor_size_sampler = provider.GetSamplerDescriptorSize(); uint64_t sampler_heap_index = ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid; if (sampler_count_vertex != 0 || sampler_count_pixel != 0) { sampler_heap_index = RequestSamplerBindfulDescriptors( draw_sampler_bindful_heap_index_, uint32_t(sampler_count_partial_update), uint32_t(sampler_count_vertex + sampler_count_pixel), sampler_cpu_handle, sampler_gpu_handle); if (sampler_heap_index == ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid) { XELOGE("Failed to allocate sampler descriptors"); return false; } } if (draw_view_bindful_heap_index_ != view_heap_index) { // Need to update all view descriptors. write_textures_vertex = texture_count_vertex != 0; write_textures_pixel = texture_count_pixel != 0; bindful_textures_written_vertex_ = false; bindful_textures_written_pixel_ = false; // If updating fully, write the shared memory SRV and UAV descriptors and, // if needed, the EDRAM descriptor. gpu_handle_shared_memory_and_edram_ = view_gpu_handle; shared_memory_->WriteRawSRVDescriptor(view_cpu_handle); view_cpu_handle.ptr += descriptor_size_view; view_gpu_handle.ptr += descriptor_size_view; shared_memory_->WriteRawUAVDescriptor(view_cpu_handle); view_cpu_handle.ptr += descriptor_size_view; view_gpu_handle.ptr += descriptor_size_view; if (edram_rov_used) { render_target_cache_->WriteEdramUintPow2UAVDescriptor(view_cpu_handle, 2); view_cpu_handle.ptr += descriptor_size_view; view_gpu_handle.ptr += descriptor_size_view; } current_graphics_root_up_to_date_ &= ~(1u << kRootParameter_Bindful_SharedMemoryAndEdram); } if (sampler_heap_index != ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid && draw_sampler_bindful_heap_index_ != sampler_heap_index) { write_samplers_vertex = sampler_count_vertex != 0; write_samplers_pixel = sampler_count_pixel != 0; bindful_samplers_written_vertex_ = false; bindful_samplers_written_pixel_ = false; } // Write the descriptors. if (write_textures_vertex) { assert_true(current_graphics_root_bindful_extras_.textures_vertex != RootBindfulExtraParameterIndices::kUnavailable); gpu_handle_textures_vertex_ = view_gpu_handle; for (size_t i = 0; i < texture_count_vertex; ++i) { texture_cache_->WriteActiveTextureBindfulSRV(textures_vertex[i], view_cpu_handle); view_cpu_handle.ptr += descriptor_size_view; view_gpu_handle.ptr += descriptor_size_view; } current_texture_layout_uid_vertex_ = texture_layout_uid_vertex; current_texture_srv_keys_vertex_.resize( std::max(current_texture_srv_keys_vertex_.size(), size_t(texture_count_vertex))); texture_cache_->WriteActiveTextureSRVKeys( current_texture_srv_keys_vertex_.data(), textures_vertex.data(), texture_count_vertex); bindful_textures_written_vertex_ = true; current_graphics_root_up_to_date_ &= ~(1u << current_graphics_root_bindful_extras_.textures_vertex); } if (write_textures_pixel) { assert_true(current_graphics_root_bindful_extras_.textures_pixel != RootBindfulExtraParameterIndices::kUnavailable); gpu_handle_textures_pixel_ = view_gpu_handle; for (size_t i = 0; i < texture_count_pixel; ++i) { texture_cache_->WriteActiveTextureBindfulSRV((*textures_pixel)[i], view_cpu_handle); view_cpu_handle.ptr += descriptor_size_view; view_gpu_handle.ptr += descriptor_size_view; } current_texture_layout_uid_pixel_ = texture_layout_uid_pixel; current_texture_srv_keys_pixel_.resize(std::max( current_texture_srv_keys_pixel_.size(), size_t(texture_count_pixel))); texture_cache_->WriteActiveTextureSRVKeys( current_texture_srv_keys_pixel_.data(), textures_pixel->data(), texture_count_pixel); bindful_textures_written_pixel_ = true; current_graphics_root_up_to_date_ &= ~(1u << current_graphics_root_bindful_extras_.textures_pixel); } if (write_samplers_vertex) { assert_true(current_graphics_root_bindful_extras_.samplers_vertex != RootBindfulExtraParameterIndices::kUnavailable); gpu_handle_samplers_vertex_ = sampler_gpu_handle; for (size_t i = 0; i < sampler_count_vertex; ++i) { texture_cache_->WriteSampler(current_samplers_vertex_[i], sampler_cpu_handle); sampler_cpu_handle.ptr += descriptor_size_sampler; sampler_gpu_handle.ptr += descriptor_size_sampler; } // Current samplers have already been updated. bindful_samplers_written_vertex_ = true; current_graphics_root_up_to_date_ &= ~(1u << current_graphics_root_bindful_extras_.samplers_vertex); } if (write_samplers_pixel) { assert_true(current_graphics_root_bindful_extras_.samplers_pixel != RootBindfulExtraParameterIndices::kUnavailable); gpu_handle_samplers_pixel_ = sampler_gpu_handle; for (size_t i = 0; i < sampler_count_pixel; ++i) { texture_cache_->WriteSampler(current_samplers_pixel_[i], sampler_cpu_handle); sampler_cpu_handle.ptr += descriptor_size_sampler; sampler_gpu_handle.ptr += descriptor_size_sampler; } // Current samplers have already been updated. bindful_samplers_written_pixel_ = true; current_graphics_root_up_to_date_ &= ~(1u << current_graphics_root_bindful_extras_.samplers_pixel); } // Wrote new descriptors on the current page. draw_view_bindful_heap_index_ = view_heap_index; if (sampler_heap_index != ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid) { draw_sampler_bindful_heap_index_ = sampler_heap_index; } } // Update the root parameters. if (!(current_graphics_root_up_to_date_ & (1u << root_parameter_fetch_constants))) { deferred_command_list_.D3DSetGraphicsRootConstantBufferView( root_parameter_fetch_constants, cbuffer_binding_fetch_.address); current_graphics_root_up_to_date_ |= 1u << root_parameter_fetch_constants; } if (!(current_graphics_root_up_to_date_ & (1u << root_parameter_float_constants_vertex))) { deferred_command_list_.D3DSetGraphicsRootConstantBufferView( root_parameter_float_constants_vertex, cbuffer_binding_float_vertex_.address); current_graphics_root_up_to_date_ |= 1u << root_parameter_float_constants_vertex; } if (!(current_graphics_root_up_to_date_ & (1u << root_parameter_float_constants_pixel))) { deferred_command_list_.D3DSetGraphicsRootConstantBufferView( root_parameter_float_constants_pixel, cbuffer_binding_float_pixel_.address); current_graphics_root_up_to_date_ |= 1u << root_parameter_float_constants_pixel; } if (!(current_graphics_root_up_to_date_ & (1u << root_parameter_system_constants))) { deferred_command_list_.D3DSetGraphicsRootConstantBufferView( root_parameter_system_constants, cbuffer_binding_system_.address); current_graphics_root_up_to_date_ |= 1u << root_parameter_system_constants; } if (!(current_graphics_root_up_to_date_ & (1u << root_parameter_bool_loop_constants))) { deferred_command_list_.D3DSetGraphicsRootConstantBufferView( root_parameter_bool_loop_constants, cbuffer_binding_bool_loop_.address); current_graphics_root_up_to_date_ |= 1u << root_parameter_bool_loop_constants; } if (bindless_resources_used_) { if (!(current_graphics_root_up_to_date_ & (1u << kRootParameter_Bindless_DescriptorIndicesPixel))) { deferred_command_list_.D3DSetGraphicsRootConstantBufferView( kRootParameter_Bindless_DescriptorIndicesPixel, cbuffer_binding_descriptor_indices_pixel_.address); current_graphics_root_up_to_date_ |= 1u << kRootParameter_Bindless_DescriptorIndicesPixel; } if (!(current_graphics_root_up_to_date_ & (1u << kRootParameter_Bindless_DescriptorIndicesVertex))) { deferred_command_list_.D3DSetGraphicsRootConstantBufferView( kRootParameter_Bindless_DescriptorIndicesVertex, cbuffer_binding_descriptor_indices_vertex_.address); current_graphics_root_up_to_date_ |= 1u << kRootParameter_Bindless_DescriptorIndicesVertex; } if (!(current_graphics_root_up_to_date_ & (1u << kRootParameter_Bindless_SamplerHeap))) { deferred_command_list_.D3DSetGraphicsRootDescriptorTable( kRootParameter_Bindless_SamplerHeap, sampler_bindless_heap_gpu_start_); current_graphics_root_up_to_date_ |= 1u << kRootParameter_Bindless_SamplerHeap; } if (!(current_graphics_root_up_to_date_ & (1u << kRootParameter_Bindless_ViewHeap))) { deferred_command_list_.D3DSetGraphicsRootDescriptorTable( kRootParameter_Bindless_ViewHeap, view_bindless_heap_gpu_start_); current_graphics_root_up_to_date_ |= 1u << kRootParameter_Bindless_ViewHeap; } } else { if (!(current_graphics_root_up_to_date_ & (1u << kRootParameter_Bindful_SharedMemoryAndEdram))) { deferred_command_list_.D3DSetGraphicsRootDescriptorTable( kRootParameter_Bindful_SharedMemoryAndEdram, gpu_handle_shared_memory_and_edram_); current_graphics_root_up_to_date_ |= 1u << kRootParameter_Bindful_SharedMemoryAndEdram; } uint32_t extra_index; extra_index = current_graphics_root_bindful_extras_.textures_pixel; if (extra_index != RootBindfulExtraParameterIndices::kUnavailable && !(current_graphics_root_up_to_date_ & (1u << extra_index))) { deferred_command_list_.D3DSetGraphicsRootDescriptorTable( extra_index, gpu_handle_textures_pixel_); current_graphics_root_up_to_date_ |= 1u << extra_index; } extra_index = current_graphics_root_bindful_extras_.samplers_pixel; if (extra_index != RootBindfulExtraParameterIndices::kUnavailable && !(current_graphics_root_up_to_date_ & (1u << extra_index))) { deferred_command_list_.D3DSetGraphicsRootDescriptorTable( extra_index, gpu_handle_samplers_pixel_); current_graphics_root_up_to_date_ |= 1u << extra_index; } extra_index = current_graphics_root_bindful_extras_.textures_vertex; if (extra_index != RootBindfulExtraParameterIndices::kUnavailable && !(current_graphics_root_up_to_date_ & (1u << extra_index))) { deferred_command_list_.D3DSetGraphicsRootDescriptorTable( extra_index, gpu_handle_textures_vertex_); current_graphics_root_up_to_date_ |= 1u << extra_index; } extra_index = current_graphics_root_bindful_extras_.samplers_vertex; if (extra_index != RootBindfulExtraParameterIndices::kUnavailable && !(current_graphics_root_up_to_date_ & (1u << extra_index))) { deferred_command_list_.D3DSetGraphicsRootDescriptorTable( extra_index, gpu_handle_samplers_vertex_); current_graphics_root_up_to_date_ |= 1u << extra_index; } } return true; } uint32_t D3D12CommandProcessor::GetSupportedMemExportFormatSize( xenos::ColorFormat format) { switch (format) { case xenos::ColorFormat::k_8_8_8_8: case xenos::ColorFormat::k_2_10_10_10: // TODO(Triang3l): Investigate how k_8_8_8_8_A works - not supported in the // texture cache currently. // case xenos::ColorFormat::k_8_8_8_8_A: case xenos::ColorFormat::k_10_11_11: case xenos::ColorFormat::k_11_11_10: case xenos::ColorFormat::k_16_16: case xenos::ColorFormat::k_16_16_FLOAT: case xenos::ColorFormat::k_32_FLOAT: case xenos::ColorFormat::k_8_8_8_8_AS_16_16_16_16: case xenos::ColorFormat::k_2_10_10_10_AS_16_16_16_16: case xenos::ColorFormat::k_10_11_11_AS_16_16_16_16: case xenos::ColorFormat::k_11_11_10_AS_16_16_16_16: return 1; case xenos::ColorFormat::k_16_16_16_16: case xenos::ColorFormat::k_16_16_16_16_FLOAT: case xenos::ColorFormat::k_32_32_FLOAT: return 2; case xenos::ColorFormat::k_32_32_32_32_FLOAT: return 4; default: break; } return 0; } ID3D12Resource* D3D12CommandProcessor::RequestReadbackBuffer(uint32_t size) { if (size == 0) { return nullptr; } size = xe::align(size, kReadbackBufferSizeIncrement); if (size > readback_buffer_size_) { const ui::d3d12::D3D12Provider& provider = GetD3D12Provider(); ID3D12Device* device = provider.GetDevice(); D3D12_RESOURCE_DESC buffer_desc; ui::d3d12::util::FillBufferResourceDesc(buffer_desc, size, D3D12_RESOURCE_FLAG_NONE); ID3D12Resource* buffer; if (FAILED(device->CreateCommittedResource( &ui::d3d12::util::kHeapPropertiesReadback, provider.GetHeapFlagCreateNotZeroed(), &buffer_desc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&buffer)))) { XELOGE("Failed to create a {} MB readback buffer", size >> 20); return nullptr; } if (readback_buffer_ != nullptr) { readback_buffer_->Release(); } readback_buffer_ = buffer; } return readback_buffer_; } void D3D12CommandProcessor::WriteGammaRampSRV( bool is_pwl, D3D12_CPU_DESCRIPTOR_HANDLE handle) const { ID3D12Device* device = GetD3D12Provider().GetDevice(); D3D12_SHADER_RESOURCE_VIEW_DESC desc; desc.Format = DXGI_FORMAT_R10G10B10A2_UNORM; desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; desc.Buffer.StructureByteStride = 0; desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; if (is_pwl) { desc.Format = DXGI_FORMAT_R16G16_UINT; desc.Buffer.FirstElement = 256 * 4 / 4; desc.Buffer.NumElements = 128 * 3; } else { desc.Format = DXGI_FORMAT_R10G10B10A2_UNORM; desc.Buffer.FirstElement = 0; desc.Buffer.NumElements = 256; } device->CreateShaderResourceView(gamma_ramp_buffer_.Get(), &desc, handle); } } // namespace d3d12 } // namespace gpu } // namespace xe