From 0b1da0910670cd766dca38af2e0d7528b7d0ecef Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Mon, 18 Jan 2016 16:50:06 +0100 Subject: [PATCH 1/7] d3d12: Store vertex attributes as SRV and disable Input_layout. --- rpcs3/D3D12GSRender.vcxproj | 2 +- rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp | 99 ++++++++----------- .../D3D12/D3D12FragmentProgramDecompiler.cpp | 4 +- rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp | 94 +++++++++++------- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 10 +- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 1 - rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 44 +-------- .../D3D12/D3D12VertexProgramDecompiler.cpp | 19 ++-- 8 files changed, 123 insertions(+), 150 deletions(-) diff --git a/rpcs3/D3D12GSRender.vcxproj b/rpcs3/D3D12GSRender.vcxproj index a610917e18..4ef08e2967 100644 --- a/rpcs3/D3D12GSRender.vcxproj +++ b/rpcs3/D3D12GSRender.vcxproj @@ -63,7 +63,7 @@ - true + false diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 71cad3cf89..a95b377179 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -9,15 +9,18 @@ #include "../rsx_methods.h" -std::vector D3D12GSRender::upload_vertex_attributes(const std::vector > &vertex_ranges) +std::vector D3D12GSRender::upload_vertex_attributes( + const std::vector > &vertex_ranges, + gsl::not_null command_list) { - std::vector vertex_buffer_views; - - m_IASet.clear(); + std::vector vertex_buffer_views; + command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_vertex_buffer_data.Get(), D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, D3D12_RESOURCE_STATE_COPY_DEST)); size_t input_slot = 0; size_t vertex_count = 0; + size_t offset_in_vertex_buffers_buffer = 0; + for (const auto &pair : vertex_ranges) vertex_count += pair.second; @@ -34,9 +37,9 @@ std::vector D3D12GSRender::upload_vertex_attributes(co // Active vertex array const rsx::data_array_format_info &info = vertex_arrays_info[index]; - u32 element_size = rsx::get_vertex_type_size_on_host(info.type, info.size); - + size_t element_size = rsx::get_vertex_type_size_on_host(info.type, info.size); size_t buffer_size = element_size * vertex_count; + size_t heap_offset = m_buffer_data.alloc(buffer_size); void *mapped_buffer = m_buffer_data.map(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); @@ -47,25 +50,22 @@ std::vector D3D12GSRender::upload_vertex_attributes(co } m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); - D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view = - { - m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset, - (UINT)buffer_size, - (UINT)element_size + command_list->CopyBufferRegion(m_vertex_buffer_data.Get(), offset_in_vertex_buffers_buffer, m_buffer_data.get_heap(), heap_offset, buffer_size); + + D3D12_SHADER_RESOURCE_VIEW_DESC vertex_buffer_view = { + get_vertex_attribute_format(info.type, info.size), + D3D12_SRV_DIMENSION_BUFFER, + D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING }; + vertex_buffer_view.Buffer.FirstElement = offset_in_vertex_buffers_buffer / element_size; + vertex_buffer_view.Buffer.NumElements = buffer_size / element_size; vertex_buffer_views.push_back(vertex_buffer_view); + offset_in_vertex_buffers_buffer = (offset_in_vertex_buffers_buffer + buffer_size + 191) / 192; // 192 is multiple of 2, 4, 6, 8, 12, 16, 24, 32, 48, 64 + offset_in_vertex_buffers_buffer *= 192; + m_timers.m_buffer_upload_size += buffer_size; - D3D12_INPUT_ELEMENT_DESC IAElement = {}; - IAElement.SemanticName = "TEXCOORD"; - IAElement.SemanticIndex = (UINT)index; - IAElement.InputSlot = (UINT)input_slot++; - IAElement.Format = get_vertex_attribute_format(info.type, info.size); - IAElement.AlignedByteOffset = 0; - IAElement.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; - IAElement.InstanceDataStepRate = 0; - m_IASet.push_back(IAElement); } else if (register_vertex_info[index].size > 0) { @@ -74,34 +74,31 @@ std::vector D3D12GSRender::upload_vertex_attributes(co const std::vector &data = register_vertex_data[index]; - u32 element_size = rsx::get_vertex_type_size_on_host(info.type, info.size); - + size_t element_size = rsx::get_vertex_type_size_on_host(info.type, info.size); size_t buffer_size = data.size(); + size_t heap_offset = m_buffer_data.alloc(buffer_size); void *mapped_buffer = m_buffer_data.map(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); memcpy(mapped_buffer, data.data(), data.size()); m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); - D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view = { - m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset, - (UINT)buffer_size, - (UINT)element_size + command_list->CopyBufferRegion(m_vertex_buffer_data.Get(), offset_in_vertex_buffers_buffer, m_buffer_data.get_heap(), heap_offset, buffer_size); + + D3D12_SHADER_RESOURCE_VIEW_DESC vertex_buffer_view = { + get_vertex_attribute_format(info.type, info.size), + D3D12_SRV_DIMENSION_BUFFER, + D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING }; + vertex_buffer_view.Buffer.FirstElement = offset_in_vertex_buffers_buffer / element_size; + vertex_buffer_view.Buffer.NumElements = buffer_size / element_size; vertex_buffer_views.push_back(vertex_buffer_view); - D3D12_INPUT_ELEMENT_DESC IAElement = {}; - IAElement.SemanticName = "TEXCOORD"; - IAElement.SemanticIndex = (UINT)index; - IAElement.InputSlot = (UINT)input_slot++; - IAElement.Format = get_vertex_attribute_format(info.type, info.size); - IAElement.AlignedByteOffset = 0; - IAElement.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA; - IAElement.InstanceDataStepRate = 1; - m_IASet.push_back(IAElement); + offset_in_vertex_buffers_buffer = (offset_in_vertex_buffers_buffer + buffer_size + 191) / 192; // 192 is multiple of 2, 4, 6, 8, 12, 16, 24, 32, 48, 64 + offset_in_vertex_buffers_buffer *= 192; } } - + command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_vertex_buffer_data.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER)); return vertex_buffer_views; } @@ -190,7 +187,7 @@ void D3D12GSRender::upload_and_bind_fragment_shader_constants(size_t descriptor_ std::tuple D3D12GSRender::upload_inlined_vertex_array() { UINT offset = 0; - m_IASet.clear(); + // Bind attributes for (int index = 0; index < rsx::limits::vertex_count; ++index) { @@ -199,16 +196,6 @@ std::tuple D3D12GSRender::upload_inlined_verte if (!info.size) // disabled continue; - D3D12_INPUT_ELEMENT_DESC IAElement = {}; - IAElement.SemanticName = "TEXCOORD"; - IAElement.SemanticIndex = (UINT)index; - IAElement.InputSlot = 0; - IAElement.Format = get_vertex_attribute_format(info.type, info.size); - IAElement.AlignedByteOffset = offset; - IAElement.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; - IAElement.InstanceDataStepRate = 0; - m_IASet.push_back(IAElement); - offset += rsx::get_vertex_type_size_on_host(info.type, info.size); } @@ -258,11 +245,11 @@ std::tuple D3D12GSRender::generate_index_buffer return std::make_tuple(index_buffer_view, index_count); } -std::tuple D3D12GSRender::upload_and_set_vertex_index_data(ID3D12GraphicsCommandList *command_list) +std::tuple> D3D12GSRender::upload_and_set_vertex_index_data(ID3D12GraphicsCommandList *command_list) { if (draw_command == Draw_command::draw_command_inlined_array) { - size_t vertex_count; +/* size_t vertex_count; D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view; std::tie(vertex_buffer_view, vertex_count) = upload_inlined_vertex_array(); command_list->IASetVertexBuffers(0, (UINT)1, &vertex_buffer_view); @@ -274,28 +261,25 @@ std::tuple D3D12GSRender::upload_and_set_vertex_index_data(ID3D12G size_t index_count; std::tie(index_buffer_view, index_count) = generate_index_buffer_for_emulated_primitives_array({ { 0, (u32)vertex_count } }); command_list->IASetIndexBuffer(&index_buffer_view); - return std::make_tuple(true, index_count); + return std::make_tuple(true, index_count);*/ } if (draw_command == Draw_command::draw_command_array) { - const std::vector &vertex_buffer_views = upload_vertex_attributes(first_count_commands); - command_list->IASetVertexBuffers(0, (UINT)vertex_buffer_views.size(), vertex_buffer_views.data()); - if (is_primitive_native(draw_mode)) { // Index count size_t vertex_count = 0; for (const auto &pair : first_count_commands) vertex_count += pair.second; - return std::make_tuple(false, vertex_count); + return std::make_tuple(false, vertex_count, upload_vertex_attributes(first_count_commands, command_list)); } D3D12_INDEX_BUFFER_VIEW index_buffer_view; size_t index_count; std::tie(index_buffer_view, index_count) = generate_index_buffer_for_emulated_primitives_array(first_count_commands); command_list->IASetIndexBuffer(&index_buffer_view); - return std::make_tuple(true, index_count); + return std::make_tuple(true, index_count, upload_vertex_attributes(first_count_commands, command_list)); } assert(draw_command == Draw_command::draw_command_indexed); @@ -337,10 +321,7 @@ std::tuple D3D12GSRender::upload_and_set_vertex_index_data(ID3D12G m_timers.m_buffer_upload_size += buffer_size; command_list->IASetIndexBuffer(&index_buffer_view); - const std::vector &vertex_buffer_views = upload_vertex_attributes({ std::make_pair(0, max_index + 1) }); - command_list->IASetVertexBuffers(0, (UINT)vertex_buffer_views.size(), vertex_buffer_views.data()); - - return std::make_tuple(true, index_count); + return std::make_tuple(true, index_count, upload_vertex_attributes({ std::make_pair(0, max_index + 1) }, command_list)); } #endif diff --git a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp index f40c238929..bdc6debbb5 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12FragmentProgramDecompiler.cpp @@ -123,7 +123,7 @@ void D3D12FragmentDecompiler::insertConstants(std::stringstream & OS) for (ParamItem PI : PT.items) { size_t textureIndex = atoi(PI.name.data() + 3); - OS << "Texture2D " << PI.name << " : register(t" << textureIndex << ");" << std::endl; + OS << "Texture2D " << PI.name << " : register(t" << textureIndex + 16 << ");" << std::endl; OS << "sampler " << PI.name << "sampler : register(s" << textureIndex << ");" << std::endl; } } @@ -132,7 +132,7 @@ void D3D12FragmentDecompiler::insertConstants(std::stringstream & OS) for (ParamItem PI : PT.items) { size_t textureIndex = atoi(PI.name.data() + 3); - OS << "TextureCube " << PI.name << " : register(t" << textureIndex << ");" << std::endl; + OS << "TextureCube " << PI.name << " : register(t" << textureIndex + 16 << ");" << std::endl; OS << "sampler " << PI.name << "sampler : register(s" << textureIndex << ");" << std::endl; } } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 9d89cdf53f..86b2e23749 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -143,33 +143,38 @@ D3D12GSRender::D3D12GSRender() m_device->CreateRenderTargetView(m_backbuffer[1].Get(), &renter_target_view_desc, m_backbuffer_descriptor_heap[1]->GetCPUDescriptorHandleForHeapStart()); // Common root signatures - for (unsigned texture_count = 0; texture_count < 17; texture_count++) + for (int vertex_buffer_count = 1; vertex_buffer_count <= 16; vertex_buffer_count++) { - CD3DX12_DESCRIPTOR_RANGE descriptorRange[] = + for (unsigned texture_count = 0; texture_count < 17; texture_count++) { - // Scale Offset data - CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 1, 0), - // Constants - CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 2, 1), - // Textures - CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, texture_count, 0), - // Samplers - CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, texture_count, 0), - }; - CD3DX12_ROOT_PARAMETER RP[2]; - RP[0].InitAsDescriptorTable((texture_count > 0) ? 3 : 2, &descriptorRange[0]); - RP[1].InitAsDescriptorTable(1, &descriptorRange[3]); + CD3DX12_DESCRIPTOR_RANGE descriptorRange[] = + { + // Vertex buffer + CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, vertex_buffer_count, 0), + // Scale Offset data + CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 1, 0), + // Constants + CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 2, 1), + // Textures + CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, texture_count, 16), + // Samplers + CD3DX12_DESCRIPTOR_RANGE(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, texture_count, 0), + }; + CD3DX12_ROOT_PARAMETER RP[2]; + RP[0].InitAsDescriptorTable((texture_count > 0) ? 4 : 3, &descriptorRange[0]); + RP[1].InitAsDescriptorTable(1, &descriptorRange[4]); - Microsoft::WRL::ComPtr rootSignatureBlob; - Microsoft::WRL::ComPtr errorBlob; - CHECK_HRESULT(wrapD3D12SerializeRootSignature( - &CD3DX12_ROOT_SIGNATURE_DESC((texture_count > 0) ? 2 : 1, RP, 0, 0, D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), - D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob)); + Microsoft::WRL::ComPtr rootSignatureBlob; + Microsoft::WRL::ComPtr errorBlob; + CHECK_HRESULT(wrapD3D12SerializeRootSignature( + &CD3DX12_ROOT_SIGNATURE_DESC((texture_count > 0) ? 2 : 1, RP, 0, 0), + D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob)); - m_device->CreateRootSignature(0, - rootSignatureBlob->GetBufferPointer(), - rootSignatureBlob->GetBufferSize(), - IID_PPV_ARGS(m_root_signatures[texture_count].GetAddressOf())); + m_device->CreateRootSignature(0, + rootSignatureBlob->GetBufferPointer(), + rootSignatureBlob->GetBufferSize(), + IID_PPV_ARGS(m_root_signatures[texture_count][vertex_buffer_count - 1].GetAddressOf())); + } } m_per_frame_storage[0].init(m_device.Get()); @@ -194,6 +199,17 @@ D3D12GSRender::D3D12GSRender() m_readback_resources.init(m_device.Get(), 1024 * 1024 * 128, D3D12_HEAP_TYPE_READBACK, D3D12_RESOURCE_STATE_COPY_DEST); m_buffer_data.init(m_device.Get(), 1024 * 1024 * 896, D3D12_HEAP_TYPE_UPLOAD, D3D12_RESOURCE_STATE_GENERIC_READ); + CHECK_HRESULT( + m_device->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), + D3D12_HEAP_FLAG_NONE, + &CD3DX12_RESOURCE_DESC::Buffer(1024 * 1024 * 16), + D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, + nullptr, + IID_PPV_ARGS(m_vertex_buffer_data.GetAddressOf()) + ) + ); + if (rpcs3::config.rsx.d3d12.overlay.value()) init_d2d_structures(); } @@ -250,9 +266,14 @@ void D3D12GSRender::end() std::chrono::time_point vertex_index_duration_start = std::chrono::system_clock::now(); + size_t currentDescriptorIndex = get_current_resource_storage().descriptors_heap_index; + size_t vertex_count; bool indexed_draw; - std::tie(indexed_draw, vertex_count) = upload_and_set_vertex_index_data(get_current_resource_storage().command_list.Get()); + std::vector vertex_buffer_views; + std::tie(indexed_draw, vertex_count, vertex_buffer_views) = upload_and_set_vertex_index_data(get_current_resource_storage().command_list.Get()); + + size_t vertex_buffer_count = vertex_buffer_views.size(); std::chrono::time_point vertex_index_duration_end = std::chrono::system_clock::now(); m_timers.m_vertex_index_duration += std::chrono::duration_cast(vertex_index_duration_end - vertex_index_duration_start).count(); @@ -262,16 +283,23 @@ void D3D12GSRender::end() std::chrono::time_point program_load_end = std::chrono::system_clock::now(); m_timers.m_program_load_duration += std::chrono::duration_cast(program_load_end - program_load_start).count(); - get_current_resource_storage().command_list->SetGraphicsRootSignature(m_root_signatures[std::get<2>(m_current_pso)].Get()); + get_current_resource_storage().command_list->SetGraphicsRootSignature(m_root_signatures[std::get<2>(m_current_pso)][vertex_buffer_count - 1].Get()); get_current_resource_storage().command_list->OMSetStencilRef(rsx::method_registers[NV4097_SET_STENCIL_FUNC_REF]); std::chrono::time_point constants_duration_start = std::chrono::system_clock::now(); - size_t currentDescriptorIndex = get_current_resource_storage().descriptors_heap_index; + size_t offset = 0; + for (const auto view : vertex_buffer_views) + { + m_device->CreateShaderResourceView(m_vertex_buffer_data.Get(), &view, + CD3DX12_CPU_DESCRIPTOR_HANDLE(get_current_resource_storage().descriptors_heap->GetCPUDescriptorHandleForHeapStart()) + .Offset((INT)currentDescriptorIndex + offset++, g_descriptor_stride_srv_cbv_uav)); + } + // Constants - upload_and_bind_scale_offset_matrix(currentDescriptorIndex); - upload_and_bind_vertex_shader_constants(currentDescriptorIndex + 1); - upload_and_bind_fragment_shader_constants(currentDescriptorIndex + 2); + upload_and_bind_scale_offset_matrix(currentDescriptorIndex + vertex_buffer_count); + upload_and_bind_vertex_shader_constants(currentDescriptorIndex + 1 + vertex_buffer_count); + upload_and_bind_fragment_shader_constants(currentDescriptorIndex + 2 + vertex_buffer_count); std::chrono::time_point constants_duration_end = std::chrono::system_clock::now(); m_timers.m_constants_duration += std::chrono::duration_cast(constants_duration_end - constants_duration_start).count(); @@ -281,8 +309,7 @@ void D3D12GSRender::end() std::chrono::time_point texture_duration_start = std::chrono::system_clock::now(); if (std::get<2>(m_current_pso) > 0) { - upload_and_bind_textures(get_current_resource_storage().command_list.Get(), currentDescriptorIndex + 3, std::get<2>(m_current_pso) > 0); - + upload_and_bind_textures(get_current_resource_storage().command_list.Get(), currentDescriptorIndex + 3 + vertex_buffer_count, std::get<2>(m_current_pso) > 0); get_current_resource_storage().command_list->SetGraphicsRootDescriptorTable(0, CD3DX12_GPU_DESCRIPTOR_HANDLE(get_current_resource_storage().descriptors_heap->GetGPUDescriptorHandleForHeapStart()) @@ -294,16 +321,15 @@ void D3D12GSRender::end() ); get_current_resource_storage().current_sampler_index += std::get<2>(m_current_pso); - get_current_resource_storage().descriptors_heap_index += std::get<2>(m_current_pso) + 3; + get_current_resource_storage().descriptors_heap_index += std::get<2>(m_current_pso) + 3 + vertex_buffer_count; } else { - get_current_resource_storage().command_list->SetDescriptorHeaps(1, get_current_resource_storage().descriptors_heap.GetAddressOf()); get_current_resource_storage().command_list->SetGraphicsRootDescriptorTable(0, CD3DX12_GPU_DESCRIPTOR_HANDLE(get_current_resource_storage().descriptors_heap->GetGPUDescriptorHandleForHeapStart()) .Offset((INT)currentDescriptorIndex, g_descriptor_stride_srv_cbv_uav) ); - get_current_resource_storage().descriptors_heap_index += 3; + get_current_resource_storage().descriptors_heap_index += 3 + vertex_buffer_count; } std::chrono::time_point texture_duration_end = std::chrono::system_clock::now(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index f9618600c4..433142f0b6 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -56,7 +56,7 @@ private: ComPtr m_backbuffer[2]; ComPtr m_backbuffer_descriptor_heap[2]; // m_rootSignatures[N] is RS with N texture/sample - ComPtr m_root_signatures[17]; + ComPtr m_root_signatures[17][16]; // indexed by [texture count][vertex count] // TODO: Use a tree structure to parse more efficiently data_cache m_texture_cache; @@ -115,11 +115,10 @@ private: // Textures, constants, index and vertex buffers storage data_heap m_buffer_data; data_heap m_readback_resources; + ComPtr m_vertex_buffer_data; render_targets m_rtts; - std::vector m_IASet; - INT g_descriptor_stride_srv_cbv_uav; INT g_descriptor_stride_dsv; INT g_descriptor_stride_rtv; @@ -150,14 +149,15 @@ private: * Non native primitive type are emulated by index buffers expansion. * Returns whether the draw call is indexed or not and the vertex count to draw. */ - std::tuple upload_and_set_vertex_index_data(ID3D12GraphicsCommandList *command_list); + std::tuple > upload_and_set_vertex_index_data(ID3D12GraphicsCommandList *command_list); /** * Upload all enabled vertex attributes for vertex in ranges described by vertex_ranges. * A range in vertex_range is a pair whose first element is the index of the beginning of the * range, and whose second element is the number of vertex in this range. */ - std::vector upload_vertex_attributes(const std::vector > &vertex_ranges); + std::vector upload_vertex_attributes(const std::vector > &vertex_ranges, + gsl::not_null command_list); std::tuple upload_inlined_vertex_array(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index c8a26cba2a..92720cf196 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -251,7 +251,6 @@ void D3D12GSRender::load_program() for (unsigned i = 0; i < prop.numMRT; i++) prop.Blend.RenderTarget[i].RenderTargetWriteMask = mask; - prop.IASet = m_IASet; if (!!rsx::method_registers[NV4097_SET_RESTART_INDEX_ENABLE]) { Index_array_type index_type = to_index_array_type(rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index 831146f246..56a6d7e087 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -10,7 +10,6 @@ struct D3D12PipelineProperties D3D12_PRIMITIVE_TOPOLOGY_TYPE Topology; DXGI_FORMAT DepthStencilFormat; DXGI_FORMAT RenderTargetsFormat; - std::vector IASet; D3D12_BLEND_DESC Blend; unsigned numMRT : 3; D3D12_DEPTH_STENCIL_DESC DepthStencil; @@ -19,23 +18,6 @@ struct D3D12PipelineProperties bool operator==(const D3D12PipelineProperties &in) const { - if (IASet.size() != in.IASet.size()) - return false; - for (unsigned i = 0; i < IASet.size(); i++) - { - const D3D12_INPUT_ELEMENT_DESC &a = IASet[i], &b = in.IASet[i]; - if (a.AlignedByteOffset != b.AlignedByteOffset) - return false; - if (a.Format != b.Format) - return false; - if (a.InputSlot != b.InputSlot) - return false; - if (a.InstanceDataStepRate != b.InstanceDataStepRate) - return false; - if (a.SemanticIndex != b.SemanticIndex) - return false; - } - if (memcmp(&DepthStencil, &in.DepthStencil, sizeof(D3D12_DEPTH_STENCIL_DESC))) return false; if (memcmp(&Blend, &in.Blend, sizeof(D3D12_BLEND_DESC))) @@ -118,24 +100,6 @@ bool has_attribute(size_t attribute, const std::vector return false; } -static -std::vector completes_IA_desc(const std::vector &desc, const std::vector &inputs) -{ - std::vector result(desc); - for (size_t attribute : inputs) - { - if (has_attribute(attribute, desc)) - continue; - D3D12_INPUT_ELEMENT_DESC extra_ia_desc = {}; - extra_ia_desc.SemanticIndex = (UINT)attribute; - extra_ia_desc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; - extra_ia_desc.SemanticName = "TEXCOORD"; - extra_ia_desc.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; - result.push_back(extra_ia_desc); - } - return result; -} - struct D3D12Traits { using vertex_program_type = Shader; @@ -184,7 +148,7 @@ struct D3D12Traits static pipeline_storage_type build_pipeline( const vertex_program_type &vertexProgramData, const fragment_program_type &fragmentProgramData, const pipeline_properties &pipelineProperties, - ID3D12Device *device, gsl::span, 17> root_signatures) + ID3D12Device *device, gsl::span, 17, 16> root_signatures) { std::tuple, size_t> result = {}; D3D12_GRAPHICS_PIPELINE_STATE_DESC graphicPipelineStateDesc = {}; @@ -199,7 +163,7 @@ struct D3D12Traits graphicPipelineStateDesc.PS.BytecodeLength = fragmentProgramData.bytecode->GetBufferSize(); graphicPipelineStateDesc.PS.pShaderBytecode = fragmentProgramData.bytecode->GetBufferPointer(); - graphicPipelineStateDesc.pRootSignature = root_signatures[fragmentProgramData.m_textureCount].Get(); + graphicPipelineStateDesc.pRootSignature = root_signatures[fragmentProgramData.m_textureCount][vertexProgramData.vertex_shader_inputs.size() - 1].Get(); graphicPipelineStateDesc.BlendState = pipelineProperties.Blend; graphicPipelineStateDesc.DepthStencilState = pipelineProperties.DepthStencil; @@ -211,10 +175,6 @@ struct D3D12Traits graphicPipelineStateDesc.RTVFormats[i] = pipelineProperties.RenderTargetsFormat; graphicPipelineStateDesc.DSVFormat = pipelineProperties.DepthStencilFormat; - const std::vector &completed_IA_desc = completes_IA_desc(pipelineProperties.IASet, vertexProgramData.vertex_shader_inputs); - - graphicPipelineStateDesc.InputLayout.pInputElementDescs = completed_IA_desc.data(); - graphicPipelineStateDesc.InputLayout.NumElements = (UINT)completed_IA_desc.size(); graphicPipelineStateDesc.SampleDesc.Count = 1; graphicPipelineStateDesc.SampleMask = UINT_MAX; graphicPipelineStateDesc.NodeMask = 1; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp index 7d663411d6..9c51e69492 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp @@ -54,18 +54,25 @@ void D3D12VertexProgramDecompiler::insertHeader(std::stringstream &OS) void D3D12VertexProgramDecompiler::insertInputs(std::stringstream & OS, const std::vector& inputs) { - OS << "struct VertexInput" << std::endl; - OS << "{" << std::endl; + std::vector> input_data; for (const ParamType PT : inputs) { for (const ParamItem &PI : PT.items) { - OS << " " << PT.type << " " << PI.name << ": TEXCOORD" << PI.location << ";" << std::endl; + input_data.push_back(std::make_tuple(PI.location, PI.name)); input_slots.push_back(PI.location); } } - OS << "};" << std::endl; + std::sort(input_data.begin(), input_data.end()); + + size_t t_register = 0; + for (const auto &attribute : input_data) + { + + OS << "Texture1D " << std::get<1>(attribute) << "_buffer : register(t" << t_register++ << ");\n"; + + } } void D3D12VertexProgramDecompiler::insertConstants(std::stringstream & OS, const std::vector & constants) @@ -142,7 +149,7 @@ static const reg_info reg_table[] = void D3D12VertexProgramDecompiler::insertMainStart(std::stringstream & OS) { - OS << "PixelInput main(VertexInput In)" << std::endl; + OS << "PixelInput main(uint vertex_id : SV_VertexID)" << std::endl; OS << "{" << std::endl; // Declare inside main function @@ -162,7 +169,7 @@ void D3D12VertexProgramDecompiler::insertMainStart(std::stringstream & OS) for (const ParamType PT : m_parr.params[PF_PARAM_IN]) { for (const ParamItem &PI : PT.items) - OS << " " << PT.type << " " << PI.name << " = In." << PI.name << ";" << std::endl; + OS << " " << PT.type << " " << PI.name << " = " << PI.name << "_buffer.Load(vertex_id);" << std::endl; } } From bb3950804f10e8a438eebaa1c35d65138f361632 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Mon, 18 Jan 2016 20:10:55 +0100 Subject: [PATCH 2/7] rsx: Add vertex input and output in RSXVertexProgram. --- rpcs3/Emu/RSX/Common/ProgramStateCache.cpp | 20 ++++++++++++-------- rpcs3/Emu/RSX/Common/ProgramStateCache.h | 12 ++++++------ rpcs3/Emu/RSX/RSXVertexProgram.h | 16 ++++++++++++++++ 3 files changed, 34 insertions(+), 14 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp b/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp index 32b2ebb2d8..d98ef92d2b 100644 --- a/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp +++ b/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp @@ -3,14 +3,14 @@ using namespace program_hash_util; -size_t vertex_program_hash::operator()(const std::vector &program) const +size_t vertex_program_hash::operator()(const RSXVertexProgram &program) const { // 64-bit Fowler/Noll/Vo FNV-1a hash code size_t hash = 0xCBF29CE484222325ULL; - const qword *instbuffer = (const qword*)program.data(); + const qword *instbuffer = (const qword*)program.data.data(); size_t instIndex = 0; bool end = false; - for (unsigned i = 0; i < program.size() / 4; i++) + for (unsigned i = 0; i < program.data.size() / 4; i++) { const qword inst = instbuffer[instIndex]; hash ^= inst.dword[0]; @@ -22,13 +22,17 @@ size_t vertex_program_hash::operator()(const std::vector &program) const return hash; } -bool vertex_program_compare::operator()(const std::vector &binary1, const std::vector &binary2) const +bool vertex_program_compare::operator()(const RSXVertexProgram &binary1, const RSXVertexProgram &binary2) const { - if (binary1.size() != binary2.size()) return false; - const qword *instBuffer1 = (const qword*)binary1.data(); - const qword *instBuffer2 = (const qword*)binary2.data(); + if (binary1.output_mask != binary2.output_mask) + return false; + if (binary1.rsx_vertex_inputs != binary2.rsx_vertex_inputs) + return false; + if (binary1.data.size() != binary2.data.size()) return false; + const qword *instBuffer1 = (const qword*)binary1.data.data(); + const qword *instBuffer2 = (const qword*)binary2.data.data(); size_t instIndex = 0; - for (unsigned i = 0; i < binary1.size() / 4; i++) + for (unsigned i = 0; i < binary1.data.size() / 4; i++) { const qword& inst1 = instBuffer1[instIndex]; const qword& inst2 = instBuffer2[instIndex]; diff --git a/rpcs3/Emu/RSX/Common/ProgramStateCache.h b/rpcs3/Emu/RSX/Common/ProgramStateCache.h index a2cddff089..eef6eecb3d 100644 --- a/rpcs3/Emu/RSX/Common/ProgramStateCache.h +++ b/rpcs3/Emu/RSX/Common/ProgramStateCache.h @@ -23,12 +23,12 @@ namespace program_hash_util struct vertex_program_hash { - size_t operator()(const std::vector &program) const; + size_t operator()(const RSXVertexProgram &program) const; }; struct vertex_program_compare { - bool operator()(const std::vector &binary1, const std::vector &binary2) const; + bool operator()(const RSXVertexProgram &binary1, const RSXVertexProgram &binary2) const; }; struct fragment_program_utils @@ -75,7 +75,7 @@ class program_state_cache using vertex_program_type = typename backend_traits::vertex_program_type; using fragment_program_type = typename backend_traits::fragment_program_type; - using binary_to_vertex_program = std::unordered_map, vertex_program_type, program_hash_util::vertex_program_hash, program_hash_util::vertex_program_compare> ; + using binary_to_vertex_program = std::unordered_map ; using binary_to_fragment_program = std::unordered_map; @@ -115,13 +115,13 @@ private: /// bool here to inform that the program was preexisting. std::tuple search_vertex_program(const RSXVertexProgram& rsx_vp) { - const auto& I = m_vertex_shader_cache.find(rsx_vp.data); + const auto& I = m_vertex_shader_cache.find(rsx_vp); if (I != m_vertex_shader_cache.end()) { return std::forward_as_tuple(I->second, true); } LOG_NOTICE(RSX, "VP not found in buffer!"); - vertex_program_type& new_shader = m_vertex_shader_cache[rsx_vp.data]; + vertex_program_type& new_shader = m_vertex_shader_cache[rsx_vp]; backend_traits::recompile_vertex_program(rsx_vp, new_shader, m_next_id++); return std::forward_as_tuple(new_shader, false); @@ -151,7 +151,7 @@ public: const vertex_program_type& get_transform_program(const RSXVertexProgram& rsx_vp) const { - auto I = m_vertex_shader_cache.find(rsx_vp.data); + auto I = m_vertex_shader_cache.find(rsx_vp); if (I != m_vertex_shader_cache.end()) return I->second; throw new EXCEPTION("Trying to get unknow transform program"); diff --git a/rpcs3/Emu/RSX/RSXVertexProgram.h b/rpcs3/Emu/RSX/RSXVertexProgram.h index f16d811011..d90d29fba6 100644 --- a/rpcs3/Emu/RSX/RSXVertexProgram.h +++ b/rpcs3/Emu/RSX/RSXVertexProgram.h @@ -190,7 +190,23 @@ static const std::string rsx_vp_vec_op_names[] = "SEQ", "SFL", "SGT", "SLE", "SNE", "STR", "SSG", "NULL", "NULL", "TXL" }; +struct rsx_vertex_input +{ + u8 location; // between 0 and 15 + u8 size; // between 1 and 4 + u8 frequency; + bool is_modulo; // either modulo frequency or divide frequency + bool is_array; // false if "reg value" + + bool operator==(const rsx_vertex_input other) const + { + return location == other.location && size == other.size && frequency == other.frequency && is_modulo == other.is_modulo && is_array == other.is_array; + } +}; + struct RSXVertexProgram { std::vector data; + std::vector rsx_vertex_inputs; + u32 output_mask; }; From 3d765e26bf95ea644d812cf5f7a4e28b1c18718a Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Mon, 18 Jan 2016 20:21:00 +0100 Subject: [PATCH 3/7] more --- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 36 ++++++++++++++++++++++ rpcs3/Emu/RSX/GL/GLGSRender.cpp | 36 ++++++++++++++++++++++ rpcs3/Emu/RSX/RSXVertexProgram.h | 2 +- 3 files changed, 73 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 92720cf196..9b012d91a5 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -52,6 +52,42 @@ void D3D12GSRender::load_program() if (d3.end) break; } + vertex_program.output_mask = rsx::method_registers[NV4097_SET_VERTEX_ATTRIB_OUTPUT_MASK]; + + u32 input_mask = rsx::method_registers[NV4097_SET_VERTEX_ATTRIB_INPUT_MASK]; + u32 modulo_mask = rsx::method_registers[NV4097_SET_FREQUENCY_DIVIDER_OPERATION]; + + for (u8 index = 0; index < rsx::limits::vertex_count; ++index) + { + bool enabled = !!(input_mask & (1 << index)); + if (!enabled) + continue; + + if (vertex_arrays_info[index].size > 0) + { + vertex_program.rsx_vertex_inputs.push_back( + { + index, + vertex_arrays_info[index].size, + vertex_arrays_info[index].frequency, + !!((modulo_mask >> index) & 0x1), + true + } + ); + } + else if (register_vertex_info[index].size > 0) + { + vertex_program.rsx_vertex_inputs.push_back( + { + index, + register_vertex_info[index].size, + register_vertex_info[index].frequency, + !!((modulo_mask >> index) & 0x1), + false + } + ); + } + } u32 shader_program = rsx::method_registers[NV4097_SET_SHADER_PROGRAM]; fragment_program.offset = shader_program & ~0x3; diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 94eb2b9d19..aa3175c349 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -753,6 +753,42 @@ bool GLGSRender::load_program() if (d3.end) break; } + vertex_program.output_mask = rsx::method_registers[NV4097_SET_VERTEX_ATTRIB_OUTPUT_MASK]; + + u32 input_mask = rsx::method_registers[NV4097_SET_VERTEX_ATTRIB_INPUT_MASK]; + u32 modulo_mask = rsx::method_registers[NV4097_SET_FREQUENCY_DIVIDER_OPERATION]; + + for (u8 index = 0; index < rsx::limits::vertex_count; ++index) + { + bool enabled = !!(input_mask & (1 << index)); + if (!enabled) + continue; + + if (vertex_arrays_info[index].size > 0) + { + vertex_program.rsx_vertex_inputs.push_back( + { + index, + vertex_arrays_info[index].size, + vertex_arrays_info[index].frequency, + !!((modulo_mask >> index) & 0x1), + true + } + ); + } + else if (register_vertex_info[index].size > 0) + { + vertex_program.rsx_vertex_inputs.push_back( + { + index, + register_vertex_info[index].size, + register_vertex_info[index].frequency, + !!((modulo_mask >> index) & 0x1), + false + } + ); + } + } RSXFragmentProgram fragment_program; u32 shader_program = rsx::method_registers[NV4097_SET_SHADER_PROGRAM]; diff --git a/rpcs3/Emu/RSX/RSXVertexProgram.h b/rpcs3/Emu/RSX/RSXVertexProgram.h index d90d29fba6..94f8198d8d 100644 --- a/rpcs3/Emu/RSX/RSXVertexProgram.h +++ b/rpcs3/Emu/RSX/RSXVertexProgram.h @@ -194,7 +194,7 @@ struct rsx_vertex_input { u8 location; // between 0 and 15 u8 size; // between 1 and 4 - u8 frequency; + u16 frequency; bool is_modulo; // either modulo frequency or divide frequency bool is_array; // false if "reg value" From d9f4b4b600930becd943a73b90071c541bf3b9dd Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Mon, 18 Jan 2016 20:25:48 +0100 Subject: [PATCH 4/7] more --- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 2 +- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index 9b012d91a5..e5f708cad6 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -56,7 +56,7 @@ void D3D12GSRender::load_program() u32 input_mask = rsx::method_registers[NV4097_SET_VERTEX_ATTRIB_INPUT_MASK]; u32 modulo_mask = rsx::method_registers[NV4097_SET_FREQUENCY_DIVIDER_OPERATION]; - + vertex_program.rsx_vertex_inputs.clear(); for (u8 index = 0; index < rsx::limits::vertex_count; ++index) { bool enabled = !!(input_mask & (1 << index)); diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index aa3175c349..0b67ca8838 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -757,7 +757,7 @@ bool GLGSRender::load_program() u32 input_mask = rsx::method_registers[NV4097_SET_VERTEX_ATTRIB_INPUT_MASK]; u32 modulo_mask = rsx::method_registers[NV4097_SET_FREQUENCY_DIVIDER_OPERATION]; - + vertex_program.rsx_vertex_inputs.clear(); for (u8 index = 0; index < rsx::limits::vertex_count; ++index) { bool enabled = !!(input_mask & (1 << index)); From 73a4555c9dcfa2a58924f69bb2a74b556b7b1469 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Mon, 18 Jan 2016 20:42:20 +0100 Subject: [PATCH 5/7] continue working on input less --- rpcs3/Emu/RSX/D3D12/D3D12GSRender.h | 2 +- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h | 16 ++++++++-------- .../RSX/D3D12/D3D12VertexProgramDecompiler.cpp | 17 ++++++++++++++--- .../RSX/D3D12/D3D12VertexProgramDecompiler.h | 3 ++- 4 files changed, 25 insertions(+), 13 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 433142f0b6..feb9054079 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -67,7 +67,7 @@ private: RSXVertexProgram vertex_program; RSXFragmentProgram fragment_program; PipelineStateObjectCache m_pso_cache; - std::tuple, std::vector, size_t> m_current_pso; + std::tuple, size_t, size_t> m_current_pso; struct { diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index 56a6d7e087..966545f0b8 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -75,7 +75,7 @@ public: ComPtr bytecode; // For debugging std::string content; - std::vector vertex_shader_inputs; + size_t vertex_shader_input_count; std::vector FragmentConstantOffsetCache; size_t m_textureCount; @@ -104,7 +104,7 @@ struct D3D12Traits { using vertex_program_type = Shader; using fragment_program_type = Shader; - using pipeline_storage_type = std::tuple, std::vector, size_t>; + using pipeline_storage_type = std::tuple, size_t, size_t>; using pipeline_properties = D3D12PipelineProperties; static @@ -140,15 +140,15 @@ struct D3D12Traits D3D12VertexProgramDecompiler VS(RSXVP); std::string shaderCode = VS.Decompile(); vertexProgramData.Compile(shaderCode, Shader::SHADER_TYPE::SHADER_TYPE_VERTEX); - vertexProgramData.vertex_shader_inputs = VS.input_slots; + vertexProgramData.vertex_shader_input_count = RSXVP.rsx_vertex_inputs.size(); fs::file(fs::get_config_dir() + "VertexProgram" + std::to_string(ID) + ".hlsl", fom::rewrite).write(shaderCode); vertexProgramData.id = (u32)ID; } static - pipeline_storage_type build_pipeline( - const vertex_program_type &vertexProgramData, const fragment_program_type &fragmentProgramData, const pipeline_properties &pipelineProperties, - ID3D12Device *device, gsl::span, 17, 16> root_signatures) + pipeline_storage_type build_pipeline( + const vertex_program_type &vertexProgramData, const fragment_program_type &fragmentProgramData, const pipeline_properties &pipelineProperties, + ID3D12Device *device, gsl::span, 17, 16> root_signatures) { std::tuple, size_t> result = {}; D3D12_GRAPHICS_PIPELINE_STATE_DESC graphicPipelineStateDesc = {}; @@ -163,7 +163,7 @@ struct D3D12Traits graphicPipelineStateDesc.PS.BytecodeLength = fragmentProgramData.bytecode->GetBufferSize(); graphicPipelineStateDesc.PS.pShaderBytecode = fragmentProgramData.bytecode->GetBufferPointer(); - graphicPipelineStateDesc.pRootSignature = root_signatures[fragmentProgramData.m_textureCount][vertexProgramData.vertex_shader_inputs.size() - 1].Get(); + graphicPipelineStateDesc.pRootSignature = root_signatures[fragmentProgramData.m_textureCount][vertexProgramData.vertex_shader_input_count - 1].Get(); graphicPipelineStateDesc.BlendState = pipelineProperties.Blend; graphicPipelineStateDesc.DepthStencilState = pipelineProperties.DepthStencil; @@ -186,7 +186,7 @@ struct D3D12Traits std::wstring name = L"PSO_" + std::to_wstring(vertexProgramData.id) + L"_" + std::to_wstring(fragmentProgramData.id); pso->SetName(name.c_str()); - return std::make_tuple(pso, vertexProgramData.vertex_shader_inputs, fragmentProgramData.m_textureCount); + return std::make_tuple(pso, vertexProgramData.vertex_shader_input_count, fragmentProgramData.m_textureCount); } }; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp index 9c51e69492..d91d6e105f 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp @@ -60,7 +60,6 @@ void D3D12VertexProgramDecompiler::insertInputs(std::stringstream & OS, const st for (const ParamItem &PI : PT.items) { input_data.push_back(std::make_tuple(PI.location, PI.name)); - input_slots.push_back(PI.location); } } @@ -169,7 +168,19 @@ void D3D12VertexProgramDecompiler::insertMainStart(std::stringstream & OS) for (const ParamType PT : m_parr.params[PF_PARAM_IN]) { for (const ParamItem &PI : PT.items) - OS << " " << PT.type << " " << PI.name << " = " << PI.name << "_buffer.Load(vertex_id);" << std::endl; + { + for (const auto &real_input : rsx_vertex_program.rsx_vertex_inputs) + { + if (real_input.location != PI.location) + continue; + if (!real_input.is_array) + { + OS << " " << PT.type << " " << PI.name << " = " << PI.name << "_buffer.Load(0);\n"; + continue; + } + OS << " " << PT.type << " " << PI.name << " = " << PI.name << "_buffer.Load(vertex_id);\n"; + } + } } } @@ -189,7 +200,7 @@ void D3D12VertexProgramDecompiler::insertMainEnd(std::stringstream & OS) } D3D12VertexProgramDecompiler::D3D12VertexProgramDecompiler(const RSXVertexProgram &prog) : - VertexProgramDecompiler(prog) + VertexProgramDecompiler(prog), rsx_vertex_program(prog) { } #endif diff --git a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.h b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.h index fd5a55a7a8..01161b37c8 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.h @@ -18,7 +18,8 @@ protected: virtual void insertOutputs(std::stringstream &OS, const std::vector &outputs); virtual void insertMainStart(std::stringstream &OS); virtual void insertMainEnd(std::stringstream &OS); + + const RSXVertexProgram &rsx_vertex_program; public: - std::vector input_slots; D3D12VertexProgramDecompiler(const RSXVertexProgram &prog); }; From 92c17c35ec795afdafbfe4a2543a19ff6c6f3db2 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Mon, 18 Jan 2016 20:57:07 +0100 Subject: [PATCH 6/7] more --- .../D3D12/D3D12VertexProgramDecompiler.cpp | 52 ++++++++++++++----- 1 file changed, 38 insertions(+), 14 deletions(-) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp index d91d6e105f..3f87cfbd2f 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp @@ -52,6 +52,21 @@ void D3D12VertexProgramDecompiler::insertHeader(std::stringstream &OS) OS << "};" << std::endl; } +namespace +{ + bool declare_input(std::stringstream & OS, const std::tuple &attribute, const std::vector &inputs, size_t reg) + { + for (const auto &real_input : inputs) + { + if (static_cast(real_input.location) != std::get<0>(attribute)) + continue; + OS << "Texture1D " << std::get<1>(attribute) << "_buffer : register(t" << reg++ << ");\n"; + return true; + } + return false; + } +} + void D3D12VertexProgramDecompiler::insertInputs(std::stringstream & OS, const std::vector& inputs) { std::vector> input_data; @@ -68,9 +83,8 @@ void D3D12VertexProgramDecompiler::insertInputs(std::stringstream & OS, const st size_t t_register = 0; for (const auto &attribute : input_data) { - - OS << "Texture1D " << std::get<1>(attribute) << "_buffer : register(t" << t_register++ << ");\n"; - + if (declare_input(OS, attribute, rsx_vertex_program.rsx_vertex_inputs, t_register)) + t_register++; } } @@ -146,6 +160,26 @@ static const reg_info reg_table[] = { "tc8", true, "dst_reg15", "", false }, }; +namespace +{ + void add_input(std::stringstream & OS, const ParamItem &PI, const std::vector &inputs) + { + for (const auto &real_input : inputs) + { + if (real_input.location != PI.location) + continue; + if (!real_input.is_array) + { + OS << " float4 " << PI.name << " = " << PI.name << "_buffer.Load(0);\n"; + return; + } + OS << " float4 " << PI.name << " = " << PI.name << "_buffer.Load(vertex_id);\n"; + return; + } + OS << " float4 " << PI.name << " = float4(0., 0., 0., 1.);\n"; + } +} + void D3D12VertexProgramDecompiler::insertMainStart(std::stringstream & OS) { OS << "PixelInput main(uint vertex_id : SV_VertexID)" << std::endl; @@ -169,17 +203,7 @@ void D3D12VertexProgramDecompiler::insertMainStart(std::stringstream & OS) { for (const ParamItem &PI : PT.items) { - for (const auto &real_input : rsx_vertex_program.rsx_vertex_inputs) - { - if (real_input.location != PI.location) - continue; - if (!real_input.is_array) - { - OS << " " << PT.type << " " << PI.name << " = " << PI.name << "_buffer.Load(0);\n"; - continue; - } - OS << " " << PT.type << " " << PI.name << " = " << PI.name << "_buffer.Load(vertex_id);\n"; - } + add_input(OS, PI, rsx_vertex_program.rsx_vertex_inputs); } } } From 233689a6bcf316a14a36c330d1a7df15c2c34f04 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Mon, 18 Jan 2016 23:57:56 +0100 Subject: [PATCH 7/7] enable instancing --- rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp index 3f87cfbd2f..a56b5f4212 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp @@ -173,6 +173,16 @@ namespace OS << " float4 " << PI.name << " = " << PI.name << "_buffer.Load(0);\n"; return; } + if (real_input.frequency > 1) + { + if (real_input.is_modulo) + { + OS << " float4 " << PI.name << " = " << PI.name << "_buffer.Load(vertex_id % " << real_input.frequency << ");\n"; + return; + } + OS << " float4 " << PI.name << " = " << PI.name << "_buffer.Load(vertex_id / " << real_input.frequency << ");\n"; + return; + } OS << " float4 " << PI.name << " = " << PI.name << "_buffer.Load(vertex_id);\n"; return; }