From c6451b414907bfe091890d6f94db759f3ae1801e Mon Sep 17 00:00:00 2001 From: Grien25 <96752609+Grien25@users.noreply.github.com> Date: Fri, 22 Aug 2025 23:46:46 -0700 Subject: [PATCH] Add resolve logging and improve BC4/BC5 SNORM Support --- AGENTS.md | 42 +++++++++++++++++++ CodexInfo.md | 33 +++++++++++++++ .../gpu/d3d12/d3d12_render_target_cache.cc | 23 ++++++++++ src/xenia/gpu/d3d12/d3d12_texture_cache.h | 16 +++---- src/xenia/gpu/render_target_cache.cc | 6 +++ .../gpu/vulkan/vulkan_render_target_cache.cc | 23 ++++++++++ src/xenia/gpu/vulkan/vulkan_texture_cache.cc | 34 ++++++++++++--- 7 files changed, 164 insertions(+), 13 deletions(-) create mode 100644 AGENTS.md create mode 100644 CodexInfo.md diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 000000000..f3396620d --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,42 @@ +# Repository Guidelines + +## Project Structure & Module Organization +- `src/xenia/`: Core C++20 sources. Key modules include `app/`, `cpu/`, `gpu/`, `kernel/`, `apu/`, `hid/`, `ui/`, `vfs/`. +- `src/xenia/base/testing/`: Unit tests (Catch2). +- `docs/`: Build and style docs (`docs/building.md`, `docs/style_guide.md`). +- `tools/`: Build tooling (Premake, helpers). The `xb` launcher symlinks to `xenia-build.py`. +- `third_party/`: Vendored dependencies. +- `assets/`: Icons and other assets. +- Build outputs: `build/` (generated projects, `build/bin/...`). + +## Build, Test, and Development Commands +- Setup: `./xb setup` — initialize toolchains and submodules. +- Build: `./xb build --config=Release` — compile (use `Checked`/`Debug` as needed). +- IDE files: `./xb premake` or `./xb devenv` — generate/open projects (VS/Xcode/CMake). +- Format: `./xb format` — run clang-format over the tree. +- Tests: `./xb test` — run unit tests; GPU tests via `./xb gputest`. +- Update: `./xb pull` — fetch/rebase and refresh submodules/projects. +Notes: Windows requires Visual Studio 2022; Linux uses Clang 19 and up-to-date Vulkan drivers. + +## Coding Style & Naming Conventions +- clang-format (Google style). Run `./xb format` before committing. +- 2-space indentation, LF endings, ~80-column preference. +- Follow Google C++ naming; attribute TODOs as `// TODO(yourgithubname): ...`. +- Sort includes per the style guide; avoid tabs. See `docs/style_guide.md`. + +## Testing Guidelines +- Framework: Catch2 (`third_party/catch`). +- Location & names: place tests near the code or in `src/xenia/*/testing/`, named `*_test.cc`. +- Run locally with `./xb test`; keep tests deterministic and focused. +- Add GPU tests only when relevant and guard with appropriate configurations. + +## Commit & Pull Request Guidelines +- Message style: concise, imperative, and scoped, e.g., `[GPU] Fix index buffer overrun` or `[Kernel] Guard XAM attach path`. +- Keep history clean; each commit builds and passes format/tests. +- PRs: include a clear description, reproduction steps, linked issues, and platform details (OS, GPU/driver, backend). Attach logs when investigating runtime issues (`--log_file=stdout` for console output). +- Legal/ethics: do not use or reference Xbox XDK materials; never include game content. Follow `.github/CONTRIBUTING.md`. + +## Security & Configuration Tips +- Prefer `--flagfile=flags.txt` for repeatable runs. Avoid hard-coded game IDs or per-title hacks in code; use configuration variables instead. +- On Windows debugging, set `Command` to `$(SolutionDir)$(TargetPath)` and `Working Directory` to `$(SolutionDir)..\..`. + diff --git a/CodexInfo.md b/CodexInfo.md new file mode 100644 index 000000000..2b80b15cd --- /dev/null +++ b/CodexInfo.md @@ -0,0 +1,33 @@ +I’m thinking about how to toggle sRGB gamma. I should mention the cvar +--gamma_render_target_as_srgb=true if scenes appear too dark. Also, the +--readback_resolve=false option is important to bring up, as it’s false by +default. It might be a good idea to suggest capturing logs to confirm shader +selection and check for any failed memory region messages. + +Alright, I’ll compile this into a concise checklist for clarity! + +codex +Agreed — we’re in good shape to retest now. Here’s a quick checklist: + +- Build: + - ./xb build --config=Release +- D3D12 run (Windows): + - xenia_canary.exe --gpu=d3d12 --log_resolves=true --d3d12_edram_rov=false + + - Then retry with --d3d12_edram_rov=true for comparison. +- Vulkan run: + - xenia_canary.exe --gpu=vulkan --log_resolves=true +- What to watch: + - No “Failed to obtain the resolve destination memory region” errors. + - Resolve logs show shader names and dispatch group sizes; gameplay no +longer white/blank; models/textures persist. + - Lighting improves from BC5/BC4 SNORM mapping (normals look correct). + - VRAM usage plateaus rather than steadily growing. +- If still dark/muted: + - Try --gamma_render_target_as_srgb=true to test gamma targets as sRGB. +- Optional: + - Add --trace_gpu to confirm resolve passes output valid color each +dispatch. + +If you want, I can gate logs by title ID or add brief rate-limiting after you +validate. \ No newline at end of file diff --git a/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc b/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc index fb4934924..de5821c31 100644 --- a/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc +++ b/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc @@ -1349,6 +1349,21 @@ bool D3D12RenderTargetCache::Resolve(const Memory& memory, // Copying. bool copied = false; if (resolve_info.copy_dest_extent_length) { + if (cvars::log_resolves) { + uint32_t span_base = 0, span_row_len = 0, span_rows = 0, span_pitch = 0; + resolve_info.GetCopyEdramTileSpan(span_base, span_row_len, span_rows, + span_pitch); + XELOGGPU( + "Resolve(D3D12): copy_len=%u dest=[%u..%u) tiles base=%u row_len=%u " + "rows=%u pitch=%u clearD=%d clearC=%d", + resolve_info.copy_dest_extent_length, + resolve_info.copy_dest_extent_start, + resolve_info.copy_dest_extent_start + + resolve_info.copy_dest_extent_length, + span_base, span_row_len, span_rows, span_pitch, + int(resolve_info.IsClearingDepth()), + int(resolve_info.IsClearingColor())); + } if (GetPath() == Path::kHostRenderTargets) { // Dump the current contents of the render targets owning the affected // range to edram_buffer_. @@ -1372,6 +1387,11 @@ bool D3D12RenderTargetCache::Resolve(const Memory& memory, if (copy_shader != draw_util::ResolveCopyShaderIndex::kUnknown) { const draw_util::ResolveCopyShaderInfo& copy_shader_info = draw_util::resolve_copy_shader_info[size_t(copy_shader)]; + if (cvars::log_resolves) { + XELOGGPU("Resolve(D3D12): shader=%s groups=(%u,%u)", + copy_shader_info.debug_name, copy_group_count_x, + copy_group_count_y); + } // Make sure there is memory to write to. bool copy_dest_committed; @@ -1465,6 +1485,9 @@ bool D3D12RenderTargetCache::Resolve(const Memory& memory, command_processor_.SetExternalPipeline( resolve_copy_pipelines_[size_t(copy_shader)]); command_processor_.SubmitBarriers(); + if (cvars::log_resolves) { + XELOGGPU("Resolve(D3D12): dispatch %s", copy_shader_info.debug_name); + } command_list.D3DDispatch(copy_group_count_x, copy_group_count_y, 1); // Order the resolve with other work using the destination as a UAV. diff --git a/src/xenia/gpu/d3d12/d3d12_texture_cache.h b/src/xenia/gpu/d3d12/d3d12_texture_cache.h index 347772bb6..ad3b6c324 100644 --- a/src/xenia/gpu/d3d12/d3d12_texture_cache.h +++ b/src/xenia/gpu/d3d12/d3d12_texture_cache.h @@ -451,9 +451,9 @@ class D3D12TextureCache final : public TextureCache { {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, - // k_DXN - {DXGI_FORMAT_BC5_UNORM, DXGI_FORMAT_BC5_UNORM, kLoadShaderIndex128bpb, - DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true, + // k_DXN (BC5). Use SNORM when the guest requests signed components. + {DXGI_FORMAT_BC5_TYPELESS, DXGI_FORMAT_BC5_UNORM, kLoadShaderIndex128bpb, + DXGI_FORMAT_BC5_SNORM, kLoadShaderIndexUnknown, true, DXGI_FORMAT_R8G8_UNORM, kLoadShaderIndexDXNToRG8, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_8_8_8_8_AS_16_16_16_16 @@ -501,10 +501,12 @@ class D3D12TextureCache final : public TextureCache { {DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8_UNORM, kLoadShaderIndexDXT3A, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, - // k_DXT5A - {DXGI_FORMAT_BC4_UNORM, DXGI_FORMAT_BC4_UNORM, kLoadShaderIndex64bpb, - DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true, DXGI_FORMAT_R8_UNORM, - kLoadShaderIndexDXT5AToR8, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, + // k_DXT5A (BC4). Use SNORM when the guest requests signed components. + {DXGI_FORMAT_BC4_TYPELESS, DXGI_FORMAT_BC4_UNORM, kLoadShaderIndex64bpb, + DXGI_FORMAT_BC4_SNORM, kLoadShaderIndexUnknown, true, + // Fallback for devices without BC4 support. + DXGI_FORMAT_R8_UNORM, kLoadShaderIndexDXT5AToR8, + xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_CTX1 {DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_UNORM, kLoadShaderIndexCTX1, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN, diff --git a/src/xenia/gpu/render_target_cache.cc b/src/xenia/gpu/render_target_cache.cc index e8887535e..d4e33a1c3 100644 --- a/src/xenia/gpu/render_target_cache.cc +++ b/src/xenia/gpu/render_target_cache.cc @@ -199,6 +199,12 @@ DEFINE_bool( "between different render targets in separate EDRAM locations.", "GPU"); +// Optional resolve logging for debugging specific titles. +DEFINE_bool( + log_resolves, false, + "Log EDRAM resolve parameters and selected shaders for debugging.", + "GPU"); + namespace xe { namespace gpu { diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc index 84336cabc..27b7289b5 100644 --- a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc @@ -1050,6 +1050,21 @@ bool VulkanRenderTargetCache::Resolve(const Memory& memory, // Copying. bool copied = false; if (resolve_info.copy_dest_extent_length) { + if (cvars::log_resolves) { + uint32_t span_base = 0, span_row_len = 0, span_rows = 0, span_pitch = 0; + resolve_info.GetCopyEdramTileSpan(span_base, span_row_len, span_rows, + span_pitch); + XELOGGPU( + "Resolve(VK): copy_len=%u dest=[%u..%u) tiles base=%u row_len=%u rows=%u " + "pitch=%u clearD=%d clearC=%d", + resolve_info.copy_dest_extent_length, + resolve_info.copy_dest_extent_start, + resolve_info.copy_dest_extent_start + + resolve_info.copy_dest_extent_length, + span_base, span_row_len, span_rows, span_pitch, + int(resolve_info.IsClearingDepth()), + int(resolve_info.IsClearingColor())); + } if (GetPath() == Path::kHostRenderTargets) { // Dump the current contents of the render targets owning the affected // range to edram_buffer_. @@ -1073,6 +1088,11 @@ bool VulkanRenderTargetCache::Resolve(const Memory& memory, if (copy_shader != draw_util::ResolveCopyShaderIndex::kUnknown) { const draw_util::ResolveCopyShaderInfo& copy_shader_info = draw_util::resolve_copy_shader_info[size_t(copy_shader)]; + if (cvars::log_resolves) { + XELOGGPU("Resolve(VK): shader=%s groups=(%u,%u)", + copy_shader_info.debug_name, copy_group_count_x, + copy_group_count_y); + } // Make sure there is memory to write to. bool copy_dest_committed; @@ -1154,6 +1174,9 @@ bool VulkanRenderTargetCache::Resolve(const Memory& memory, sizeof(copy_shader_constants), ©_shader_constants); } command_processor_.SubmitBarriers(true); + if (cvars::log_resolves) { + XELOGGPU("Resolve(VK): dispatch %s", copy_shader_info.debug_name); + } command_buffer.CmdVkDispatch(copy_group_count_x, copy_group_count_y, 1); diff --git a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc index ea5e56f3e..1d2e13802 100644 --- a/src/xenia/gpu/vulkan/vulkan_texture_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_texture_cache.cc @@ -325,10 +325,10 @@ constexpr VulkanTextureCache::HostFormatPair {{kLoadShaderIndexUnknown}, {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, - // k_DXN - // VK_FORMAT_BC5_UNORM_BLOCK is optional. + // k_DXN (BC5) + // VK_FORMAT_BC5_[U|S]NORM_BLOCK are optional. {{kLoadShaderIndex128bpb, VK_FORMAT_BC5_UNORM_BLOCK, true}, - {kLoadShaderIndexUnknown}, + {kLoadShaderIndex128bpb, VK_FORMAT_BC5_SNORM_BLOCK, true}, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG}, // k_8_8_8_8_AS_16_16_16_16 {{kLoadShaderIndex32bpb, VK_FORMAT_R8G8B8A8_UNORM}, @@ -376,10 +376,10 @@ constexpr VulkanTextureCache::HostFormatPair {{kLoadShaderIndexDXT3A, VK_FORMAT_R8_UNORM}, {kLoadShaderIndexUnknown}, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, - // k_DXT5A - // VK_FORMAT_BC4_UNORM_BLOCK is optional. + // k_DXT5A (BC4) + // VK_FORMAT_BC4_[U|S]NORM_BLOCK are optional. {{kLoadShaderIndex64bpb, VK_FORMAT_BC4_UNORM_BLOCK, true}, - {kLoadShaderIndexUnknown}, + {kLoadShaderIndex64bpb, VK_FORMAT_BC4_SNORM_BLOCK, true}, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR}, // k_CTX1 {{kLoadShaderIndexCTX1, VK_FORMAT_R8G8_UNORM}, @@ -1853,6 +1853,17 @@ bool VulkanTextureCache::Initialize() { host_format_dxn.format_unsigned.format = VK_FORMAT_R8G8_UNORM; host_format_dxn.format_unsigned.block_compressed = false; } + // Check signed BC5 support; if unavailable, fall back to R8G8_SNORM. + if (host_format_dxn.format_signed.format == VK_FORMAT_BC5_SNORM_BLOCK) { + ifn.vkGetPhysicalDeviceFormatProperties( + physical_device, VK_FORMAT_BC5_SNORM_BLOCK, &format_properties); + if ((format_properties.optimalTilingFeatures & kLinearFilterFeatures) != + kLinearFilterFeatures) { + host_format_dxn.format_signed.load_shader = kLoadShaderIndexDXNToRG8; + host_format_dxn.format_signed.format = VK_FORMAT_R8G8_SNORM; + host_format_dxn.format_signed.block_compressed = false; + } + } HostFormatPair& host_format_dxt5a = host_formats_[uint32_t(xenos::TextureFormat::k_DXT5A)]; assert_true(host_format_dxt5a.format_unsigned.format == @@ -1865,6 +1876,17 @@ bool VulkanTextureCache::Initialize() { host_format_dxt5a.format_unsigned.format = VK_FORMAT_R8_UNORM; host_format_dxt5a.format_unsigned.block_compressed = false; } + // Check signed BC4 support; if unavailable, fall back to R8_SNORM. + if (host_format_dxt5a.format_signed.format == VK_FORMAT_BC4_SNORM_BLOCK) { + ifn.vkGetPhysicalDeviceFormatProperties( + physical_device, VK_FORMAT_BC4_SNORM_BLOCK, &format_properties); + if ((format_properties.optimalTilingFeatures & kLinearFilterFeatures) != + kLinearFilterFeatures) { + host_format_dxt5a.format_signed.load_shader = kLoadShaderIndexDXT5AToR8; + host_format_dxt5a.format_signed.format = VK_FORMAT_R8_SNORM; + host_format_dxt5a.format_signed.block_compressed = false; + } + } // k_16, k_16_16, k_16_16_16_16 - UNORM / SNORM are optional, fall back to // SFLOAT, which is mandatory and is always filterable (the guest 16-bit // format is filterable, 16-bit fixed-point is the full texture filtering