From d4334312f3790c0194e9a743d2e5d4bd446f0bc1 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Tue, 11 Sep 2018 12:37:47 +0300 Subject: [PATCH] [D3D12] DXBC samplers --- .../gpu/d3d12/d3d12_command_processor.cc | 27 +++++--- src/xenia/gpu/d3d12/d3d12_shader.cc | 32 +++++---- src/xenia/gpu/d3d12/d3d12_shader.h | 42 ++++++++---- src/xenia/gpu/d3d12/pipeline_cache.cc | 13 ++-- src/xenia/gpu/d3d12/texture_cache.cc | 46 ++++++++----- src/xenia/gpu/d3d12/texture_cache.h | 4 +- src/xenia/gpu/dxbc_shader_translator.cc | 67 ++++++++++++++++++- src/xenia/gpu/dxbc_shader_translator.h | 22 ++++++ 8 files changed, 188 insertions(+), 65 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 96b7fb5d0..8f1a3a257 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -129,11 +129,11 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature( uint32_t pixel_texture_count = 0, pixel_sampler_count = 0; if (pixel_shader != nullptr) { pixel_shader->GetTextureSRVs(pixel_texture_count); - pixel_shader->GetSamplerFetchConstants(pixel_sampler_count); + pixel_shader->GetSamplerBindings(pixel_sampler_count); } uint32_t vertex_texture_count, vertex_sampler_count; vertex_shader->GetTextureSRVs(vertex_texture_count); - vertex_shader->GetSamplerFetchConstants(vertex_sampler_count); + vertex_shader->GetSamplerBindings(vertex_sampler_count); // Max 96 textures (if all kinds of tfetch instructions are used for all fetch // registers) and 32 samplers (one sampler per used fetch), but different // shader stages have different texture sets. @@ -319,11 +319,11 @@ uint32_t D3D12CommandProcessor::GetRootExtraParameterIndices( uint32_t pixel_texture_count = 0, pixel_sampler_count = 0; if (pixel_shader != nullptr) { pixel_shader->GetTextureSRVs(pixel_texture_count); - pixel_shader->GetSamplerFetchConstants(pixel_sampler_count); + pixel_shader->GetSamplerBindings(pixel_sampler_count); } uint32_t vertex_texture_count, vertex_sampler_count; vertex_shader->GetTextureSRVs(vertex_texture_count); - vertex_shader->GetSamplerFetchConstants(vertex_sampler_count); + vertex_shader->GetSamplerBindings(vertex_sampler_count); uint32_t index = kRootParameter_Count_Base; if (pixel_texture_count != 0) { @@ -1525,11 +1525,10 @@ bool D3D12CommandProcessor::UpdateBindings( // Get used textures and samplers. uint32_t pixel_texture_count, pixel_sampler_count; const D3D12Shader::TextureSRV* pixel_textures; - const uint32_t* pixel_samplers; + const D3D12Shader::SamplerBinding* pixel_samplers; if (pixel_shader != nullptr) { pixel_textures = pixel_shader->GetTextureSRVs(pixel_texture_count); - pixel_samplers = - pixel_shader->GetSamplerFetchConstants(pixel_sampler_count); + pixel_samplers = pixel_shader->GetSamplerBindings(pixel_sampler_count); } else { pixel_textures = nullptr; pixel_texture_count = 0; @@ -1539,8 +1538,8 @@ bool D3D12CommandProcessor::UpdateBindings( uint32_t vertex_texture_count, vertex_sampler_count; const D3D12Shader::TextureSRV* vertex_textures = vertex_shader->GetTextureSRVs(vertex_texture_count); - const uint32_t* vertex_samplers = - vertex_shader->GetSamplerFetchConstants(vertex_sampler_count); + const D3D12Shader::SamplerBinding* vertex_samplers = + vertex_shader->GetSamplerBindings(vertex_sampler_count); uint32_t texture_count = pixel_texture_count + vertex_texture_count; uint32_t sampler_count = pixel_sampler_count + vertex_sampler_count; @@ -1785,7 +1784,10 @@ bool D3D12CommandProcessor::UpdateBindings( RootExtraParameterIndices::kUnavailable); gpu_handle_pixel_samplers_ = sampler_gpu_handle; for (uint32_t i = 0; i < pixel_sampler_count; ++i) { - texture_cache_->WriteSampler(pixel_samplers[i], sampler_cpu_handle); + const D3D12Shader::SamplerBinding& sampler = pixel_samplers[i]; + texture_cache_->WriteSampler(sampler.fetch_constant, sampler.mag_filter, + sampler.min_filter, sampler.mip_filter, + sampler.aniso_filter, sampler_cpu_handle); sampler_cpu_handle.ptr += descriptor_size_sampler; sampler_gpu_handle.ptr += descriptor_size_sampler; } @@ -1797,7 +1799,10 @@ bool D3D12CommandProcessor::UpdateBindings( RootExtraParameterIndices::kUnavailable); gpu_handle_vertex_samplers_ = sampler_gpu_handle; for (uint32_t i = 0; i < vertex_sampler_count; ++i) { - texture_cache_->WriteSampler(vertex_samplers[i], sampler_cpu_handle); + const D3D12Shader::SamplerBinding& sampler = vertex_samplers[i]; + texture_cache_->WriteSampler(sampler.fetch_constant, sampler.mag_filter, + sampler.min_filter, sampler.mip_filter, + sampler.aniso_filter, sampler_cpu_handle); sampler_cpu_handle.ptr += descriptor_size_sampler; sampler_gpu_handle.ptr += descriptor_size_sampler; } diff --git a/src/xenia/gpu/d3d12/d3d12_shader.cc b/src/xenia/gpu/d3d12/d3d12_shader.cc index 6d937d1a1..d99b664b6 100644 --- a/src/xenia/gpu/d3d12/d3d12_shader.cc +++ b/src/xenia/gpu/d3d12/d3d12_shader.cc @@ -29,27 +29,33 @@ D3D12Shader::~D3D12Shader() { void D3D12Shader::SetTexturesAndSamplers( const DxbcShaderTranslator::TextureSRV* texture_srvs, - uint32_t texture_srv_count, const uint32_t* sampler_fetch_constants, - uint32_t sampler_count) { + uint32_t texture_srv_count, + const DxbcShaderTranslator::SamplerBinding* sampler_bindings, + uint32_t sampler_binding_count) { + texture_srvs_.clear(); + texture_srvs_.reserve(texture_srv_count); used_texture_mask_ = 0; for (uint32_t i = 0; i < texture_srv_count; ++i) { - TextureSRV& srv = texture_srvs_[i]; + TextureSRV srv; const DxbcShaderTranslator::TextureSRV& translator_srv = texture_srvs[i]; srv.fetch_constant = translator_srv.fetch_constant; srv.dimension = translator_srv.dimension; + texture_srvs_.push_back(srv); used_texture_mask_ |= 1u << translator_srv.fetch_constant; } - texture_srv_count_ = texture_srv_count; -#if 0 - // If there's a texture, there's a sampler for it. - used_texture_mask_ = 0; - for (uint32_t i = 0; i < sampler_count; ++i) { - uint32_t sampler_fetch_constant = sampler_fetch_constants[i]; - sampler_fetch_constants_[i] = sampler_fetch_constant; - used_texture_mask_ |= 1u << sampler_fetch_constant; + sampler_bindings_.clear(); + sampler_bindings_.reserve(sampler_binding_count); + for (uint32_t i = 0; i < texture_srv_count; ++i) { + SamplerBinding sampler; + const DxbcShaderTranslator::SamplerBinding& translator_sampler = + sampler_bindings[i]; + sampler.fetch_constant = translator_sampler.fetch_constant; + sampler.mag_filter = translator_sampler.mag_filter; + sampler.min_filter = translator_sampler.min_filter; + sampler.mip_filter = translator_sampler.mip_filter; + sampler.aniso_filter = translator_sampler.aniso_filter; + sampler_bindings_.push_back(sampler); } - sampler_count_ = sampler_count; -#endif } bool D3D12Shader::DisassembleDXBC() { diff --git a/src/xenia/gpu/d3d12/d3d12_shader.h b/src/xenia/gpu/d3d12/d3d12_shader.h index ceed8a55e..e195ba900 100644 --- a/src/xenia/gpu/d3d12/d3d12_shader.h +++ b/src/xenia/gpu/d3d12/d3d12_shader.h @@ -10,6 +10,8 @@ #ifndef XENIA_GPU_D3D12_D3D12_SHADER_H_ #define XENIA_GPU_D3D12_D3D12_SHADER_H_ +#include + #include "xenia/gpu/dxbc_shader_translator.h" #include "xenia/gpu/shader.h" #include "xenia/ui/d3d12/d3d12_api.h" @@ -26,34 +28,48 @@ class D3D12Shader : public Shader { void SetTexturesAndSamplers( const DxbcShaderTranslator::TextureSRV* texture_srvs, - uint32_t texture_srv_count, const uint32_t* sampler_fetch_constants, - uint32_t sampler_count); + uint32_t texture_srv_count, + const DxbcShaderTranslator::SamplerBinding* sampler_bindings, + uint32_t sampler_binding_count); bool DisassembleDXBC(); + static constexpr uint32_t kMaxTextureSRVIndexBits = + DxbcShaderTranslator::kMaxTextureSRVIndexBits; + static constexpr uint32_t kMaxTextureSRVs = + DxbcShaderTranslator::kMaxTextureSRVs; struct TextureSRV { uint32_t fetch_constant; TextureDimension dimension; }; const TextureSRV* GetTextureSRVs(uint32_t& count_out) const { - count_out = texture_srv_count_; - return texture_srvs_; - } - const uint32_t* GetSamplerFetchConstants(uint32_t& count_out) const { - count_out = sampler_count_; - return sampler_fetch_constants_; + count_out = uint32_t(texture_srvs_.size()); + return texture_srvs_.data(); } const uint32_t GetUsedTextureMask() const { return used_texture_mask_; } + static constexpr uint32_t kMaxSamplerBindingIndexBits = + DxbcShaderTranslator::kMaxSamplerBindingIndexBits; + static constexpr uint32_t kMaxSamplerBindings = + DxbcShaderTranslator::kMaxSamplerBindings; + struct SamplerBinding { + uint32_t fetch_constant; + TextureFilter mag_filter; + TextureFilter min_filter; + TextureFilter mip_filter; + AnisoFilter aniso_filter; + }; + const SamplerBinding* GetSamplerBindings(uint32_t& count_out) const { + count_out = uint32_t(sampler_bindings_.size()); + return sampler_bindings_.data(); + } + private: ID3DBlob* blob_ = nullptr; - // Up to 32 2D array textures, 32 3D textures and 32 cube textures. - TextureSRV texture_srvs_[96]; - uint32_t texture_srv_count_ = 0; - uint32_t sampler_fetch_constants_[32]; - uint32_t sampler_count_ = 0; + std::vector texture_srvs_; uint32_t used_texture_mask_ = 0; + std::vector sampler_bindings_; }; } // namespace d3d12 diff --git a/src/xenia/gpu/d3d12/pipeline_cache.cc b/src/xenia/gpu/d3d12/pipeline_cache.cc index f35d824cc..e136fd250 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.cc +++ b/src/xenia/gpu/d3d12/pipeline_cache.cc @@ -196,17 +196,14 @@ bool PipelineCache::TranslateShader(D3D12Shader* shader, return false; } - // TODO(Triang3l): Re-enable this when the DXBC shader translators supports - // textures. uint32_t texture_srv_count; const DxbcShaderTranslator::TextureSRV* texture_srvs = shader_translator_->GetTextureSRVs(texture_srv_count); -#if 0 - uint32_t sampler_count; - const uint32_t* sampler_fetch_constants = - shader_translator_->GetSamplerFetchConstants(sampler_count); -#endif - shader->SetTexturesAndSamplers(texture_srvs, texture_srv_count, nullptr, 0); + uint32_t sampler_binding_count; + const DxbcShaderTranslator::SamplerBinding* sampler_bindings = + shader_translator_->GetSamplerBindings(sampler_binding_count); + shader->SetTexturesAndSamplers(texture_srvs, texture_srv_count, + sampler_bindings, sampler_binding_count); if (shader->is_valid()) { XELOGGPU("Generated %s shader (%db) - hash %.16" PRIX64 ":\n%s\n", diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc index d2c22eacb..74bff2937 100644 --- a/src/xenia/gpu/d3d12/texture_cache.cc +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -445,34 +445,46 @@ void TextureCache::WriteTextureSRV(uint32_t fetch_constant, } void TextureCache::WriteSampler(uint32_t fetch_constant, + TextureFilter mag_filter, + TextureFilter min_filter, + TextureFilter mip_filter, + AnisoFilter aniso_filter, D3D12_CPU_DESCRIPTOR_HANDLE handle) { auto& regs = *register_file_; uint32_t r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + fetch_constant * 6; auto group = reinterpret_cast(®s.values[r]); auto& fetch = group->texture_fetch; - // TODO(Triang3l): Fetch shader instruction overrides. + if (mag_filter == TextureFilter::kUseFetchConst) { + mag_filter = TextureFilter(fetch.mag_filter); + } + if (min_filter == TextureFilter::kUseFetchConst) { + min_filter = TextureFilter(fetch.min_filter); + } + if (mip_filter == TextureFilter::kUseFetchConst) { + mip_filter = TextureFilter(fetch.mip_filter); + } + if (aniso_filter == AnisoFilter::kUseFetchConst) { + aniso_filter = AnisoFilter(fetch.aniso_filter); + } D3D12_SAMPLER_DESC desc; if (fetch.aniso_filter) { desc.Filter = D3D12_FILTER_ANISOTROPIC; - desc.MaxAnisotropy = std::min(1u << (fetch.aniso_filter - 1), 16u); + desc.MaxAnisotropy = std::min(1u << (uint32_t(aniso_filter) - 1), 16u); } else { - D3D12_FILTER_TYPE filter_min = - TextureFilter(fetch.min_filter) == TextureFilter::kLinear - ? D3D12_FILTER_TYPE_LINEAR - : D3D12_FILTER_TYPE_POINT; - D3D12_FILTER_TYPE filter_mag = - TextureFilter(fetch.mag_filter) == TextureFilter::kLinear - ? D3D12_FILTER_TYPE_LINEAR - : D3D12_FILTER_TYPE_POINT; - D3D12_FILTER_TYPE filter_mip = - TextureFilter(fetch.mip_filter) == TextureFilter::kLinear - ? D3D12_FILTER_TYPE_LINEAR - : D3D12_FILTER_TYPE_POINT; + D3D12_FILTER_TYPE d3d_filter_min = min_filter == TextureFilter::kLinear + ? D3D12_FILTER_TYPE_LINEAR + : D3D12_FILTER_TYPE_POINT; + D3D12_FILTER_TYPE d3d_filter_mag = mag_filter == TextureFilter::kLinear + ? D3D12_FILTER_TYPE_LINEAR + : D3D12_FILTER_TYPE_POINT; + D3D12_FILTER_TYPE d3d_filter_mip = mip_filter == TextureFilter::kLinear + ? D3D12_FILTER_TYPE_LINEAR + : D3D12_FILTER_TYPE_POINT; // TODO(Triang3l): Investigate mip_filter TextureFilter::kBaseMap. - desc.Filter = - D3D12_ENCODE_BASIC_FILTER(filter_min, filter_mag, filter_mip, - D3D12_FILTER_REDUCTION_TYPE_STANDARD); + desc.Filter = D3D12_ENCODE_BASIC_FILTER( + d3d_filter_min, d3d_filter_mag, d3d_filter_mip, + D3D12_FILTER_REDUCTION_TYPE_STANDARD); desc.MaxAnisotropy = 1; } // FIXME(Triang3l): Halfway and mirror clamp to border aren't mapped properly. diff --git a/src/xenia/gpu/d3d12/texture_cache.h b/src/xenia/gpu/d3d12/texture_cache.h index b056ce98e..fd9497476 100644 --- a/src/xenia/gpu/d3d12/texture_cache.h +++ b/src/xenia/gpu/d3d12/texture_cache.h @@ -76,7 +76,9 @@ class TextureCache { void WriteTextureSRV(uint32_t fetch_constant, TextureDimension shader_dimension, D3D12_CPU_DESCRIPTOR_HANDLE handle); - void WriteSampler(uint32_t fetch_constant, + void WriteSampler(uint32_t fetch_constant, TextureFilter mag_filter, + TextureFilter min_filter, TextureFilter mip_filter, + AnisoFilter aniso_filter, D3D12_CPU_DESCRIPTOR_HANDLE handle); static DXGI_FORMAT GetResolveDXGIFormat(TextureFormat format); diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index 442862ba9..ac252d47a 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -13,6 +13,8 @@ #include #include +#include +#include #include "third_party/dxbc/DXBCChecksum.h" #include "third_party/dxbc/d3d12TokenizedProgramFormat.hpp" @@ -2870,6 +2872,10 @@ uint32_t DxbcShaderTranslator::FindOrAddTextureSRV(uint32_t fetch_constant, return 1 + i; } } + if (texture_srvs_.size() >= kMaxTextureSRVs) { + assert_always(); + return 1 + (kMaxTextureSRVs - 1); + } TextureSRV new_texture_srv; new_texture_srv.fetch_constant = fetch_constant; new_texture_srv.dimension = dimension; @@ -2887,10 +2893,63 @@ uint32_t DxbcShaderTranslator::FindOrAddTextureSRV(uint32_t fetch_constant, new_texture_srv.name = xe::format_string("xe_texture%u_%s", fetch_constant, dimension_name); uint32_t srv_register = 1 + uint32_t(texture_srvs_.size()); - texture_srvs_.push_back(new_texture_srv); + texture_srvs_.emplace_back(std::move(new_texture_srv)); return srv_register; } +uint32_t DxbcShaderTranslator::FindOrAddSamplerBinding( + uint32_t fetch_constant, TextureFilter mag_filter, TextureFilter min_filter, + TextureFilter mip_filter, AnisoFilter aniso_filter) { + // In Direct3D 12, anisotropic filtering implies linear filtering. + if (aniso_filter != AnisoFilter::kDisabled && + aniso_filter != AnisoFilter::kUseFetchConst) { + mag_filter = TextureFilter::kLinear; + min_filter = TextureFilter::kLinear; + mip_filter = TextureFilter::kLinear; + aniso_filter = std::min(aniso_filter, AnisoFilter::kMax_16_1); + } + + for (uint32_t i = 0; i < uint32_t(sampler_bindings_.size()); ++i) { + const SamplerBinding& sampler_binding = sampler_bindings_[i]; + if (sampler_binding.fetch_constant == fetch_constant && + sampler_binding.mag_filter == mag_filter && + sampler_binding.min_filter == min_filter && + sampler_binding.mip_filter == mip_filter && + sampler_binding.aniso_filter == aniso_filter) { + return i; + } + } + + if (sampler_bindings_.size() >= kMaxSamplerBindings) { + assert_always(); + return kMaxSamplerBindings - 1; + } + + std::ostringstream name; + name << "xe_sampler" << fetch_constant; + if (aniso_filter != AnisoFilter::kUseFetchConst) { + name << "_a" << (1u << uint32_t(aniso_filter)); + } + if (aniso_filter == AnisoFilter::kDisabled || + aniso_filter == AnisoFilter::kUseFetchConst) { + static const char* kFilterSuffixes[] = {"p", "l", "b", "f"}; + name << "_" << kFilterSuffixes[uint32_t(mag_filter)] + << kFilterSuffixes[uint32_t(min_filter)] + << kFilterSuffixes[uint32_t(mip_filter)]; + } + + SamplerBinding new_sampler_binding; + new_sampler_binding.fetch_constant = fetch_constant; + new_sampler_binding.mag_filter = mag_filter; + new_sampler_binding.min_filter = min_filter; + new_sampler_binding.mip_filter = mip_filter; + new_sampler_binding.aniso_filter = aniso_filter; + new_sampler_binding.name = name.str(); + uint32_t sampler_register = 1 + uint32_t(sampler_bindings_.size()); + sampler_bindings_.emplace_back(std::move(new_sampler_binding)); + return sampler_register; +} + void DxbcShaderTranslator::ProcessTextureFetchInstruction( const ParsedTextureFetchInstruction& instr) { CheckPredicate(instr.is_predicated, instr.predicate_condition); @@ -2927,7 +2986,11 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( srv_register_3d = UINT32_MAX; } - // TODO(Triang3l): Sampler, actually sample instead of this stub. + uint32_t sampler_register = FindOrAddSamplerBinding( + tfetch_index, instr.attributes.mag_filter, instr.attributes.min_filter, + instr.attributes.mip_filter, instr.attributes.aniso_filter); + + // TODO(Triang3l): Actually sample instead of this stub. shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8)); shader_code_.push_back( diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index eaae29dda..62ce07fb2 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -62,6 +62,11 @@ class DxbcShaderTranslator : public ShaderTranslator { uint32_t color_output_map[4]; }; + // 96 textures at most because there are 32 fetch constants, and textures can + // be 2D array, 3D or cube. + static constexpr uint32_t kMaxTextureSRVIndexBits = 7; + static constexpr uint32_t kMaxTextureSRVs = + (1 << kMaxTextureSRVIndexBits) - 1; struct TextureSRV { uint32_t fetch_constant; TextureDimension dimension; @@ -73,6 +78,17 @@ class DxbcShaderTranslator : public ShaderTranslator { return texture_srvs_.data(); } + // Arbitrary limit - there can't be more than 2048 in a shader-visible + // descriptor heap, though some older hardware (tier 1 resource binding - + // Nvidia Fermi) doesn't support more than 16 samplers bound at once (we can't + // really do anything if a game uses more than 16), but just to have some + // limit so sampler count can easily be packed into 32-bit map keys (for + // instance, for root signatures). But shaders can specify overrides for + // filtering modes, and the number of possible combinations is huge - let's + // limit it to something sane. + static constexpr uint32_t kMaxSamplerBindingIndexBits = 7; + static constexpr uint32_t kMaxSamplerBindings = + (1 << kMaxSamplerBindingIndexBits) - 1; struct SamplerBinding { uint32_t fetch_constant; TextureFilter mag_filter; @@ -341,6 +357,12 @@ class DxbcShaderTranslator : public ShaderTranslator { // Returns T#/t# index (they are the same in this translator). uint32_t FindOrAddTextureSRV(uint32_t fetch_constant, TextureDimension dimension); + // Returns S#/s# index (they are the same in this translator). + uint32_t FindOrAddSamplerBinding(uint32_t fetch_constant, + TextureFilter mag_filter, + TextureFilter min_filter, + TextureFilter mip_filter, + AnisoFilter aniso_filter); void ProcessVectorAluInstruction(const ParsedAluInstruction& instr); void ProcessScalarAluInstruction(const ParsedAluInstruction& instr);